From e1656a4911d88ce30066392f91f6b879ca350077 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 23 May 2020 09:44:50 -0700 Subject: [PATCH 01/65] Remove exec() from atom.py --- propka/atom.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/propka/atom.py b/propka/atom.py index b93f051..78a6a50 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -290,8 +290,9 @@ class Atom(object): self.occ = self.occ.replace('LG', 'non_titratable_ligand') # try to initialise the group try: - # TODO - get rid of this exec() statement for security reasons - exec('self.group = propka.group.%s_group(self)' % self.occ) + group_attr = "%s_group" % self.occ + group_attr = getattr(propka.group, group_attr) + self.group = group_attr(self) except: raise Exception('%s in input_file is not recognized as a group' % self.occ) # set the model pKa value From f2aef4ce4f8e36e14d0c7a9ad713ef63e9a29cad Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 23 May 2020 09:57:53 -0700 Subject: [PATCH 02/65] De-lint bonds.py --- propka/bonds.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/propka/bonds.py b/propka/bonds.py index 8e7ef66..f8f5e03 100644 --- a/propka/bonds.py +++ b/propka/bonds.py @@ -1,16 +1,10 @@ """PROPKA representation of bonds.""" -# TODO - is pickle still used? -import pickle -# TODO - eliminate use of sys -import sys -# TODO - eliminate use of os -import os import math import json import pkg_resources import propka.calculations # TODO - replace the info/warning imports with logging functionality -from propka.lib import info, warning +from propka.lib import info # TODO - should these constants be defined higher up in the module? @@ -32,11 +26,11 @@ class BondMaker: # predefined bonding distances self.distances = {'S-S' : DISULFIDE_DISTANCE, 'F-F' : FLUORIDE_DISTANCE} self.distances_squared = {} - for k in self.distances.keys(): - self.distances_squared[k] = self.distances[k] * self.distances[k] - self.H_dist = HYDROGEN_DISTANCE + for key in self.distances: + self.distances_squared[key] = self.distances[key] * self.distances[key] + self.h_dist = HYDROGEN_DISTANCE self.default_dist = DEFAULT_DISTANCE - self.H_dist_squared = self.H_dist * self.H_dist + self.h_dist_squared = self.h_dist * self.h_dist self.default_dist_squared = self.default_dist * self.default_dist distances = list(self.distances_squared.values()) + [self.default_dist_squared] self.max_sq_distance = max(distances) @@ -74,6 +68,10 @@ class BondMaker: self.num_pi_elec_conj_bonds_ligands = {'N.am': 1, 'N.pl3': 1} self.backbone_atoms = list(self.intra_residue_backbone_bonds.keys()) self.terminal_oxygen_names = ['OXT', 'O\'\''] + self.boxes = {} + self.num_box_x = None + self.num_box_y = None + self.num_box_z = None def find_bonds_for_protein(self, protein): """Finds bonds proteins based on the way atoms normally bond in proteins. @@ -289,7 +287,7 @@ class BondMaker: return False key = '%s-%s' % (atom1.element, atom2.element) h_count = key.count('H') - if sq_dist < self.H_dist_squared and h_count == 1: + if sq_dist < self.h_dist_squared and h_count == 1: return True if sq_dist < self.default_dist_squared and h_count == 0: return True @@ -370,16 +368,17 @@ class BondMaker: z: box z-coordinates atom: the atom to place in a box """ - for bx in [x, x+1]: - for by in [y, y+1]: - for bz in [z, z+1]: - key = (bx, by, bz) + for box_x in [x, x+1]: + for box_y in [y, y+1]: + for box_z in [z, z+1]: + key = (box_x, box_y, box_z) try: self.boxes[key].append(atom) except KeyError: pass - def has_bond(self, atom1, atom2): + @classmethod + def has_bond(cls, atom1, atom2): """Look for bond between two atoms. Args: @@ -392,7 +391,8 @@ class BondMaker: return True return False - def make_bond(self, atom1, atom2): + @classmethod + def make_bond(cls, atom1, atom2): """Makes a bond between atom1 and atom2 Args: From f444d138ee4eaf03ef5adf239b8f9ee59c3e7500 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 23 May 2020 12:34:31 -0700 Subject: [PATCH 03/65] Partial de-lint of calculations.py. This really should be 3 separate files. The initial de-lint focused on the distance and hydrogen addition functions. --- propka/calculations.py | 440 +++++++++++++++++++++-------------------- 1 file changed, 230 insertions(+), 210 deletions(-) diff --git a/propka/calculations.py b/propka/calculations.py index ca0f5a6..9ee4e10 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -7,6 +7,72 @@ import propka.bonds from propka.lib import info, warning +# TODO - this file should be broken into three separate files: +# * calculations.py - includes basic functions for calculating distances, etc. +# * hydrogen.py - includes bonding and protonation functions +# * energy.py - includes energy functions (dependent on distance functions) + + +# TODO - the next set of functions form a distinct "module" for distance calculation + + +# Maximum distance used to bound calculations of smallest distance +MAX_DISTANCE = 1e6 + + +def squared_distance(atom1, atom2): + """Calculate the squared distance between two atoms. + + Args: + atom1: first atom for distance calculation + atom2: second atom for distance calculation + Returns: + distance squared + """ + dx = atom2.x - atom1.x + dy = atom2.y - atom1.y + dz = atom2.z - atom1.z + res = dx*dx+dy*dy+dz*dz + return res + + +def distance(atom1, atom2): + """Calculate the distance between two atoms. + + Args: + atom1: first atom for distance calculation + atom2: second atom for distance calculation + Returns: + distance + """ + return math.sqrt(squared_distance(atom1,atom2)) + + +def get_smallest_distance(atoms1, atoms2): + """Calculate the smallest distance between two groups of atoms. + + Args: + atoms1: atom group 1 + atoms2: atom group 2 + Returns: + smallest distance between groups + """ + res_distance = MAX_DISTANCE + res_atom1 = None + res_atom2 = None + for atom1 in atoms1: + for atom2 in atoms2: + dist = squared_distance(atom1, atom2) + if dist < res_distance: + res_distance = dist + res_atom1 = atom1 + res_atom2 = atom2 + return [res_atom1, math.sqrt(res_distance), res_atom2] + + +# TODO - the next set of functions form a distinct "module" for hydrogen addition + + def setup_bonding_and_protonation(parameters, molecular_container): """Set up bonding and protonation for a molecule. @@ -48,256 +114,245 @@ def set_ligand_atom_names(molecular_container): molecular_container.conformations[name].set_ligand_atom_names() -def addArgHydrogen(residue): - """ - Adds Arg hydrogen atoms to residues according to the 'old way'. +def add_arg_hydrogen(residue): + """Adds Arg hydrogen atoms to residues according to the 'old way'. + + Args: + residue: arginine residue to protonate + Returns: + list of hydrogen atoms """ #info('Adding arg H',residue) for atom in residue: - if atom.name == "CD": - CD = atom + if atom.name == "CD": + cd_atom = atom elif atom.name == "CZ": - CZ = atom + cz_atom = atom elif atom.name == "NE": - NE = atom + ne_atom = atom elif atom.name == "NH1": - NH1 = atom + nh1_atom = atom elif atom.name == "NH2": - NH2 = atom + nh2_atom = atom + h1_atom = protonate_sp2(cd_atom, ne_atom, cz_atom) + h1_atom.name = "HE" + h2_atom = protonate_direction(nh1_atom, ne_atom, cz_atom) + h2_atom.name = "HN1" + h3_atom = protonate_direction(nh1_atom, ne_atom, cd_atom) + h3_atom.name = "HN2" + h4_atom = protonate_direction(nh2_atom, ne_atom, cz_atom) + h4_atom.name = "HN3" + h5_atom = protonate_direction(nh2_atom, ne_atom, h1_atom) + h5_atom.name = "HN4" + return [h1_atom, h2_atom, h3_atom, h4_atom, h5_atom] - H1 = protonateSP2([CD, NE, CZ]) - H1.name = "HE" - H2 = protonateDirection([NH1, NE, CZ]) - H2.name = "HN1" - H3 = protonateDirection([NH1, NE, CD]) - H3.name = "HN2" - H4 = protonateDirection([NH2, NE, CZ]) - H4.name = "HN3" - H5 = protonateDirection([NH2, NE, H1]) - H5.name = "HN4" - return [H1,H2,H3,H4,H5] +def add_his_hydrogen(residue): + """Adds His hydrogen atoms to residues according to the 'old way'. -def addHisHydrogen(residue): - """ - Adds His hydrogen atoms to residues according to the 'old way'. + Args: + residue: histidine residue to protonate """ for atom in residue: - if atom.name == "CG": - CG = atom + if atom.name == "CG": + cg_atom = atom elif atom.name == "ND1": - ND = atom + nd_atom = atom elif atom.name == "CD2": - CD = atom + cd_atom = atom elif atom.name == "CE1": - CE = atom + ce_atom = atom elif atom.name == "NE2": - NE = atom - HD = protonateSP2([CG, ND, CE]) - HD.name = "HND" - HE = protonateSP2([CD, NE, CE]) - HE.name = "HNE" - return + ne_atom = atom + hd_atom = protonate_sp2(cg_atom, nd_atom, ce_atom) + hd_atom.name = "HND" + he_atom = protonate_sp2(cd_atom, ne_atom, ce_atom) + he_atom.name = "HNE" -def addTrpHydrogen(residue): + +def add_trp_hydrogen(residue): + """Adds Trp hydrogen atoms to residues according to the 'old way'. + + Args: + residue: tryptophan residue to protonate """ - Adds Trp hydrogen atoms to residues according to the 'old way'. - """ - CD = None - NE = None - DE = None + cd_atom = None + ne_atom = None for atom in residue: - if atom.name == "CD1": - CD = atom + if atom.name == "CD1": + cd_atom = atom elif atom.name == "NE1": - NE = atom + ne_atom = atom elif atom.name == "CE2": - CE = atom - if CD == None or NE == None or CE == None: - str = "Did not find all atoms in %s%4d - in %s" % (self.res_name, self.res_num, "addTrpHydrogen()") - info(str) - sys.exit(0) + ce_atom = atom + if (cd_atom is None) or (ne_atom is None) or (ce_atom is None): + errstr = "Unable to find all atoms for %s %s" % (residue[0].res_name, + residue[0].res_num) + raise ValueError(errstr) + he_atom = protonate_sp2(cd_atom, ne_atom, ce_atom) + he_atom.name = "HNE" - HE = protonateSP2([CD, NE, CE]) - HE.name = "HNE" - return +def add_amd_hydrogen(residue): + """Adds Gln & Asn hydrogen atoms to residues according to the 'old way'. -def addAmdHydrogen(residue): + Args: + residue: glutamine or asparagine residue to protonate """ - Adds Gln & Asn hydrogen atoms to residues according to the 'old way'. - """ - C = None - O = None - N = None + c_atom = None + o_atom = None + n_atom = None for atom in residue: - if (atom.res_name == "GLN" and atom.name == "CD") or (atom.res_name == "ASN" and atom.name == "CG"): - C = atom + if (atom.res_name == "GLN" and atom.name == "CD") or (atom.res_name == "ASN" and atom.name == "CG"): + c_atom = atom elif (atom.res_name == "GLN" and atom.name == "OE1") or (atom.res_name == "ASN" and atom.name == "OD1"): - O = atom + o_atom = atom elif (atom.res_name == "GLN" and atom.name == "NE2") or (atom.res_name == "ASN" and atom.name == "ND2"): - N = atom + n_atom = atom + if (c_atom is None) or (o_atom is None) or (n_atom is None): + errstr = "Unable to find all atoms for %s %s" % (residue[0].res_name, + residue[0].res_num) + raise ValueError(errstr) + h1_atom = protonate_direction(n_atom, o_atom, c_atom) + h1_atom.name = "HN1" + h2_atom = protonate_average_direction(n_atom, c_atom, o_atom) + h2_atom.name = "HN2" - if C == None or O == None or N == None: - str = "Did not find N, C and/or O in %s%4d - in %s" % (atom.res_name, atom.res_num, "addAmdHydrogen()") - info(str) - sys.exit(0) - H1 = protonateDirection([N, O, C]) - H1.name = "HN1" - H2 = protonateAverageDirection([N, C, O]) - H2.name = "HN2" +def add_backbone_hydrogen(residue, o_atom, c_atom): + """Adds hydrogen backbone atoms to residues according to the old way. - return + dR is wrong for the N-terminus (i.e. first residue) but it doesn't affect + anything at the moment. Could be improved, but works for now. -def addBackBoneHydrogen(residue, O, C): + Args: + residue: residue to protonate + o_atom: backbone oxygen atom + c_atom: backbone carbon atom + Returns: + [new backbone oxygen atom, new backbone carbon atom] """ - Adds hydrogen backbone atoms to residues according to the old way; dR is wrong for the N-terminus - (i.e. first residue) but it doesn't affect anything at the moment. Could be improved, but works - for now. - """ - - new_C = None - new_O = None - N = None - - + new_c_atom = None + new_o_atom = None + n_atom = None for atom in residue: if atom.name == "N": - N = atom + n_atom = atom if atom.name == "C": - new_C = atom + new_c_atom = atom if atom.name == "O": - new_O = atom - - - - - if None in [C,O,N]: - return [new_O,new_C] - - - if N.res_name == "PRO": - """ PRO doesn't have an H-atom; do nothing """ + new_o_atom = atom + if None in [c_atom, o_atom, n_atom]: + return [new_o_atom, new_c_atom] + if n_atom.res_name == "PRO": + """PRO doesn't have an H-atom; do nothing""" else: - H = protonateDirection([N, O, C]) - H.name = "H" - - return [new_O,new_C] + h_atom = protonate_direction(n_atom, o_atom, c_atom) + h_atom.name = "H" + return [new_o_atom,new_c_atom] +def protonate_direction(x1_atom, x2_atom, x3_atom): + """Protonates an atom, x1_atom, given a direction. + New direction for x1_atom proton is (x2_atom -> x3_atom). - -def protonateDirection(list): + Args: + x1_atom: atom to be protonated + x2_atom: atom for direction + x3_atom: other atom for direction + Returns: + new hydrogen atom """ - Protonates an atom, X1, given a direction (X2 -> X3) [X1, X2, X3] - """ - X1 = list[0] - X2 = list[1] - X3 = list[2] - - dX = (X3.x - X2.x) - dY = (X3.y - X2.y) - dZ = (X3.z - X2.z) + dX = (x3_atom.x - x2_atom.x) + dY = (x3_atom.y - x2_atom.y) + dZ = (x3_atom.z - x2_atom.z) length = math.sqrt( dX*dX + dY*dY + dZ*dZ ) - x = X1.x + dX/length - y = X1.y + dY/length - z = X1.z + dZ/length - - - H = make_new_H(X1,x,y,z) + x = x1_atom.x + dX/length + y = x1_atom.y + dY/length + z = x1_atom.z + dZ/length + H = make_new_h(x1_atom,x,y,z) H.name = "H" - - return H -def protonateAverageDirection(list): - """ - Protonates an atom, X1, given a direction (X1/X2 -> X3) [X1, X2, X3] - Note, this one uses the average of X1 & X2 (N & O) unlike the previous - N - C = O - """ - X1 = list[0] - X2 = list[1] - X3 = list[2] +def protonate_average_direction(x1_atom, x2_atom, x3_atom): + """Protonates an atom, x1_atom, given a direction. - dX = (X3.x + X1.x)*0.5 - X2.x - dY = (X3.y + X1.y)*0.5 - X2.y - dZ = (X3.z + X1.z)*0.5 - X2.z + New direction for x1_atom is (x1_atom/x2_atom -> x3_atom). + Note, this one uses the average of x1_atom & x2_atom (N & O) unlike + the previous N - C = O + Args: + x1_atom: atom to be protonated + x2_atom: atom for direction + x3_atom: other atom for direction + Returns: + new hydrogen atom + """ + dX = (x3_atom.x + x1_atom.x)*0.5 - x2_atom.x + dY = (x3_atom.y + x1_atom.y)*0.5 - x2_atom.y + dZ = (x3_atom.z + x1_atom.z)*0.5 - x2_atom.z length = math.sqrt( dX*dX + dY*dY + dZ*dZ ) - x = X1.x + dX/length - y = X1.y + dY/length - z = X1.z + dZ/length - - H = make_new_H(X1,x,y,z) + x = x1_atom.x + dX/length + y = x1_atom.y + dY/length + z = x1_atom.z + dZ/length + H = make_new_h(x1_atom,x,y,z) H.name = "H" - - - return H -def protonateSP2(list): - """ - Protonates a SP2 atom, H2, given a list of [X1, X2, X3] - X1-X2-X3 - """ - X1 = list[0] - X2 = list[1] - X3 = list[2] +def protonate_sp2(x1_atom, x2_atom, x3_atom): + """Protonates a SP2 atom, given a list of atoms - dX = (X1.x + X3.x)*0.5 - X2.x - dY = (X1.y + X3.y)*0.5 - X2.y - dZ = (X1.z + X3.z)*0.5 - X2.z + Args: + x1_atom: atom to set direction + x2_atom: atom to be protonated + x3_atom: other atom to set direction + Returns: + new hydrogen atom + """ + dX = (x1_atom.x + x3_atom.x)*0.5 - x2_atom.x + dY = (x1_atom.y + x3_atom.y)*0.5 - x2_atom.y + dZ = (x1_atom.z + x3_atom.z)*0.5 - x2_atom.z length = math.sqrt( dX*dX + dY*dY + dZ*dZ ) - x = X2.x - dX/length - y = X2.y - dY/length - z = X2.z - dZ/length - - H = make_new_H(X2,x,y,z) + x = x2_atom.x - dX/length + y = x2_atom.y - dY/length + z = x2_atom.z - dZ/length + H = make_new_h(x2_atom,x,y,z) H.name = "H" - return H -def make_new_H(atom, x,y,z): +def make_new_h(atom, x,y,z): + """Add a new hydrogen to an atom at the specified position. + Args: + atom: atom to protonate + x: x position of hydrogen + y: y position of hydrogen + z: z position of hydrogen + Returns: + new hydrogen atom + """ new_H = propka.atom.Atom() - new_H.set_property(numb = None, - name = 'H%s'%atom.name[1:], - res_name = atom.res_name, - chain_id = atom.chain_id, - res_num = atom.res_num, - x = x, - y = y, - z = z, - occ = None, - beta = None) + new_H.set_property(numb=None, name='H%s' % atom.name[1:], + res_name=atom.res_name, chain_id=atom.chain_id, + res_num=atom.res_num, x=x, y=y, z=z, occ=None, + beta=None) new_H.element = 'H' - - new_H.bonded_atoms = [atom] new_H.charge = 0 new_H.steric_number = 0 new_H.number_of_lone_pairs = 0 new_H.number_of_protons_to_add = 0 new_H.num_pi_elec_2_3_bonds = 0 - atom.bonded_atoms.append(new_H) atom.conformation_container.add_atom(new_H) - return new_H -######## 3.0 style protonation methods end - - - - - +# TODO - the remaining functions form a dist # # Desolvation methods @@ -416,41 +471,6 @@ def calculate_weight(parameters, Nmass): return weight - -def squared_distance(atom1, atom2): -# if atom1 in atom2.squared_distances: -# return atom2.squared_distances[atom1] - - dx = atom2.x - atom1.x - dy = atom2.y - atom1.y - dz = atom2.z - atom1.z - - res = dx*dx+dy*dy+dz*dz - - return res - - -def distance(atom1, atom2): - return math.sqrt(squared_distance(atom1,atom2)) - - - -def get_smallest_distance(atoms1, atoms2): - res_distance =1e6 - res_atom1 = None - res_atom2 = None - - for atom1 in atoms1: - for atom2 in atoms2: - dist = squared_distance(atom1, atom2) - if dist < res_distance: - res_distance = dist - res_atom1 = atom1 - res_atom2 = atom2 - - return [res_atom1, math.sqrt(res_distance), res_atom2] - - def calculatePairWeight(parameters, Nmass1, Nmass2): """ calculating the atom-pair based desolvation weight From db8571414e01b7451790c54450f90aa875cbd180 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 23 May 2020 13:11:38 -0700 Subject: [PATCH 04/65] Convert classmethod to staticmethod. Addresses https://github.com/jensengroup/propka-3.1/pull/40#discussion_r429570235 and https://github.com/jensengroup/propka-3.1/pull/40#discussion_r429570322 --- propka/bonds.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/propka/bonds.py b/propka/bonds.py index f8f5e03..d77645b 100644 --- a/propka/bonds.py +++ b/propka/bonds.py @@ -377,8 +377,8 @@ class BondMaker: except KeyError: pass - @classmethod - def has_bond(cls, atom1, atom2): + @staticmethod + def has_bond(atom1, atom2): """Look for bond between two atoms. Args: @@ -391,8 +391,8 @@ class BondMaker: return True return False - @classmethod - def make_bond(cls, atom1, atom2): + @staticmethod + def make_bond(atom1, atom2): """Makes a bond between atom1 and atom2 Args: From 02ca72d91d52838ee4ee2fe978b5dd3bc4a75074 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 23 May 2020 13:16:18 -0700 Subject: [PATCH 05/65] Remove self.h_dist attribute. Only used in __init__ function. Addresses https://github.com/jensengroup/propka-3.1/pull/40#discussion_r429570945 --- propka/bonds.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/propka/bonds.py b/propka/bonds.py index d77645b..189f9cb 100644 --- a/propka/bonds.py +++ b/propka/bonds.py @@ -28,9 +28,9 @@ class BondMaker: self.distances_squared = {} for key in self.distances: self.distances_squared[key] = self.distances[key] * self.distances[key] - self.h_dist = HYDROGEN_DISTANCE + h_dist = HYDROGEN_DISTANCE self.default_dist = DEFAULT_DISTANCE - self.h_dist_squared = self.h_dist * self.h_dist + self.h_dist_squared = h_dist * h_dist self.default_dist_squared = self.default_dist * self.default_dist distances = list(self.distances_squared.values()) + [self.default_dist_squared] self.max_sq_distance = max(distances) From 1611e9d6eadbd3ed8ebf811bbee6c7b26cf7dbf4 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 23 May 2020 13:24:05 -0700 Subject: [PATCH 06/65] Partial de-lint of calculations.py. Pausing here since I just deleted contactDesolvation(). However, there was no way that anyone was using this function since it was very badly broken and could not run. --- propka/calculations.py | 86 +++++++----------------------------------- 1 file changed, 14 insertions(+), 72 deletions(-) diff --git a/propka/calculations.py b/propka/calculations.py index 9ee4e10..095c6f4 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -352,26 +352,32 @@ def make_new_h(atom, x,y,z): return new_H -# TODO - the remaining functions form a dist +# TODO - the remaining functions form a distinct "module" for desolvation -# -# Desolvation methods -# + +MYSTERY_MIN_DISTANCE = 2.75 +MIN_DISTANCE_4TH = math.pow(MYSTERY_MIN_DISTANCE, 4) def radial_volume_desolvation(parameters, group): + """Calculate desolvation terms for group. + + Args: + parameters: parameters for desolvation calculation + group: group of atoms for calculation + """ all_atoms = group.atom.conformation_container.get_non_hydrogen_atoms() volume = 0.0 + # TODO - Nathan really wants to rename the Nmass variable. + # He had to re-read the original paper to figure out what it was. + # A better name would be num_volume. group.Nmass = 0 - min_distance_4th = 57.1914 # pow(2.75, 4) - + min_distance_4th = MIN_DISTANCE_4TH for atom in all_atoms: # ignore atoms in the same residue if atom.res_num == group.atom.res_num and atom.chain_id == group.atom.chain_id: continue - sq_dist = squared_distance(group, atom) - # desolvation if sq_dist < parameters.desolv_cutoff_squared: # use a default relative volume of 1.0 if the volume of the element is not found in parameters @@ -380,79 +386,15 @@ def radial_volume_desolvation(parameters, group): dv = parameters.VanDerWaalsVolume['C4'] else: dv = parameters.VanDerWaalsVolume.get(atom.element, 1.0) - dv_inc = dv/max(min_distance_4th, sq_dist*sq_dist) -# dv_inc = dv/(sq_dist*sq_dist) - dv/(parameters.desolv_cutoff_squared*parameters.desolv_cutoff_squared) volume += dv_inc # buried if sq_dist < parameters.buried_cutoff_squared: group.Nmass += 1 - group.buried = calculate_weight(parameters, group.Nmass) scale_factor = calculate_scale_factor(parameters, group.buried) volume_after_allowance = max(0.00, volume-parameters.desolvationAllowance) - group.Emass = group.charge * parameters.desolvationPrefactor * volume_after_allowance * scale_factor - # Emass, Nmass - # Elocl, Nlocl -> reorganisation energy (count backbone hydorgen bond acceptors, C=O) - - - - #info('%s %5.2f %5.2f %4d'%(group, group.buried, group.Emass, group.Nmass)) - return - - - -def contactDesolvation(parameters, group): - """ - calculates the desolvation according to the Contact Model, the old default - """ - - local_radius = {'ASP': 4.5, - 'GLU': 4.5, - 'HIS': 4.5, - 'CYS': 3.5, - 'TYR': 3.5, - 'LYS': 4.5, - 'ARG': 5.0, - 'C-': 4.5, - 'N+': 4.5} - - all_atoms = group.atom.conformation_container.get_non_hydrogen_atoms() - if residue.res_name in version.desolvationRadii: - local_cutoff = version.desolvationRadii[residue.res_name] - else: - local_cutoff = 0.00 - residue.Nmass = 0 - residue.Nlocl = 0 - - for atom in all_atoms: - if atom.res_num != group.atom.res_num or atom.chain_id != group.atom.chain_id: - dX = atom.x - residue.x - dY = atom.y - residue.y - dZ = atom.z - residue.z - distance = math.sqrt(dX*dX + dY*dY + dZ*dZ) - if distance < local_cutoff: - group.Nlocl += 1 - if distance < parameters.buried_cutoff: - group.Nmass += 1 - if residue.Nmass > 400: - group.location = "BURIED " - else: - group.location = "SURFACE" - group.Emass = group.charge * parameters.desolvationPrefactor * max(0.00, group.Nmass-parameters.desolvationAllowance) - group.Elocl = group.charge * parameters.desolvationLocal * group.Nlocl - # Buried ratio - new feature in propka3.0 - # Note, there will be an unforseen problem: e.g. if one residue has Nmass > Nmax and - # the other Nmass < Nmax, the Npair will not be Nmass1 + Nmass2! - residue.buried = calculateWeight(residue.Nmass) - - return 0.00, 0.00, 0.00, 0.00 - - - - - def calculate_scale_factor(parameters, weight): From 5ed77a7cf6b0d976f865f6348a4e6272f67609e5 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 23 May 2020 15:02:23 -0700 Subject: [PATCH 07/65] De-lint calculations.py. Changes were made to function names; impacted functions changed in other files. Google searches performed to look for impacts to other software. --- propka/calculations.py | 768 +++++++++++++++++-------------- propka/conformation_container.py | 2 +- propka/determinants.py | 10 +- propka/version.py | 36 +- 4 files changed, 447 insertions(+), 369 deletions(-) diff --git a/propka/calculations.py b/propka/calculations.py index 095c6f4..acfecbd 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -1,10 +1,8 @@ """PROPKA calculations.""" import math -import copy -import sys import propka.protonate import propka.bonds -from propka.lib import info, warning +from propka.lib import warning # TODO - this file should be broken into three separate files: @@ -45,7 +43,7 @@ def distance(atom1, atom2): Returns: distance """ - return math.sqrt(squared_distance(atom1,atom2)) + return math.sqrt(squared_distance(atom1, atom2)) def get_smallest_distance(atoms1, atoms2): @@ -57,17 +55,17 @@ def get_smallest_distance(atoms1, atoms2): Returns: smallest distance between groups """ - res_distance = MAX_DISTANCE + res_dist = MAX_DISTANCE res_atom1 = None res_atom2 = None for atom1 in atoms1: for atom2 in atoms2: dist = squared_distance(atom1, atom2) - if dist < res_distance: - res_distance = dist + if dist < res_dist: + res_dist = dist res_atom1 = atom1 res_atom2 = atom2 - return [res_atom1, math.sqrt(res_distance), res_atom2] + return [res_atom1, math.sqrt(res_dist), res_atom2] # TODO - the next set of functions form a distinct "module" for hydrogen addition @@ -76,11 +74,15 @@ def get_smallest_distance(atoms1, atoms2): def setup_bonding_and_protonation(parameters, molecular_container): """Set up bonding and protonation for a molecule. + NOTE - the unused `parameters` argument is required for compatibility in version.py + TODO - figure out why there is a similar function in version.py + Args: + parameters: not used molecular_container: molecule container. """ # make bonds - my_bond_maker = setup_bonding(parameters, molecular_container) + my_bond_maker = setup_bonding(molecular_container) # set up ligand atom names set_ligand_atom_names(molecular_container) # apply information on pi electrons @@ -91,9 +93,11 @@ def setup_bonding_and_protonation(parameters, molecular_container): my_protonator.protonate(molecular_container) -def setup_bonding(parameters, molecular_container): +def setup_bonding(molecular_container): """Set up bonding for a molecular container. + TODO - figure out why there is a similar function in version.py + Args: molecular_container: the molecular container in question Returns: @@ -159,11 +163,11 @@ def add_his_hydrogen(residue): elif atom.name == "ND1": nd_atom = atom elif atom.name == "CD2": - cd_atom = atom + cd_atom = atom elif atom.name == "CE1": - ce_atom = atom + ce_atom = atom elif atom.name == "NE2": - ne_atom = atom + ne_atom = atom hd_atom = protonate_sp2(cg_atom, nd_atom, ce_atom) hd_atom.name = "HND" he_atom = protonate_sp2(cd_atom, ne_atom, ce_atom) @@ -180,11 +184,11 @@ def add_trp_hydrogen(residue): ne_atom = None for atom in residue: if atom.name == "CD1": - cd_atom = atom + cd_atom = atom elif atom.name == "NE1": - ne_atom = atom + ne_atom = atom elif atom.name == "CE2": - ce_atom = atom + ce_atom = atom if (cd_atom is None) or (ne_atom is None) or (ce_atom is None): errstr = "Unable to find all atoms for %s %s" % (residue[0].res_name, residue[0].res_num) @@ -203,11 +207,14 @@ def add_amd_hydrogen(residue): o_atom = None n_atom = None for atom in residue: - if (atom.res_name == "GLN" and atom.name == "CD") or (atom.res_name == "ASN" and atom.name == "CG"): + if (atom.res_name == "GLN" and atom.name == "CD") \ + or (atom.res_name == "ASN" and atom.name == "CG"): c_atom = atom - elif (atom.res_name == "GLN" and atom.name == "OE1") or (atom.res_name == "ASN" and atom.name == "OD1"): + elif (atom.res_name == "GLN" and atom.name == "OE1") \ + or (atom.res_name == "ASN" and atom.name == "OD1"): o_atom = atom - elif (atom.res_name == "GLN" and atom.name == "NE2") or (atom.res_name == "ASN" and atom.name == "ND2"): + elif (atom.res_name == "GLN" and atom.name == "NE2") \ + or (atom.res_name == "ASN" and atom.name == "ND2"): n_atom = atom if (c_atom is None) or (o_atom is None) or (n_atom is None): errstr = "Unable to find all atoms for %s %s" % (residue[0].res_name, @@ -245,11 +252,12 @@ def add_backbone_hydrogen(residue, o_atom, c_atom): if None in [c_atom, o_atom, n_atom]: return [new_o_atom, new_c_atom] if n_atom.res_name == "PRO": - """PRO doesn't have an H-atom; do nothing""" + # PRO doesn't have an H-atom; do nothing + pass else: h_atom = protonate_direction(n_atom, o_atom, c_atom) h_atom.name = "H" - return [new_o_atom,new_c_atom] + return [new_o_atom, new_c_atom] def protonate_direction(x1_atom, x2_atom, x3_atom): @@ -264,16 +272,16 @@ def protonate_direction(x1_atom, x2_atom, x3_atom): Returns: new hydrogen atom """ - dX = (x3_atom.x - x2_atom.x) - dY = (x3_atom.y - x2_atom.y) - dZ = (x3_atom.z - x2_atom.z) - length = math.sqrt( dX*dX + dY*dY + dZ*dZ ) - x = x1_atom.x + dX/length - y = x1_atom.y + dY/length - z = x1_atom.z + dZ/length - H = make_new_h(x1_atom,x,y,z) - H.name = "H" - return H + dx = (x3_atom.x - x2_atom.x) + dy = (x3_atom.y - x2_atom.y) + dz = (x3_atom.z - x2_atom.z) + length = math.sqrt(dx*dx + dy*dy + dz*dz) + x = x1_atom.x + dx/length + y = x1_atom.y + dy/length + z = x1_atom.z + dz/length + h_atom = make_new_h(x1_atom, x, y, z) + h_atom.name = "H" + return h_atom def protonate_average_direction(x1_atom, x2_atom, x3_atom): @@ -290,16 +298,16 @@ def protonate_average_direction(x1_atom, x2_atom, x3_atom): Returns: new hydrogen atom """ - dX = (x3_atom.x + x1_atom.x)*0.5 - x2_atom.x - dY = (x3_atom.y + x1_atom.y)*0.5 - x2_atom.y - dZ = (x3_atom.z + x1_atom.z)*0.5 - x2_atom.z - length = math.sqrt( dX*dX + dY*dY + dZ*dZ ) - x = x1_atom.x + dX/length - y = x1_atom.y + dY/length - z = x1_atom.z + dZ/length - H = make_new_h(x1_atom,x,y,z) - H.name = "H" - return H + dx = (x3_atom.x + x1_atom.x)*0.5 - x2_atom.x + dy = (x3_atom.y + x1_atom.y)*0.5 - x2_atom.y + dz = (x3_atom.z + x1_atom.z)*0.5 - x2_atom.z + length = math.sqrt(dx*dx + dy*dy + dz*dz) + x = x1_atom.x + dx/length + y = x1_atom.y + dy/length + z = x1_atom.z + dz/length + h_atom = make_new_h(x1_atom, x, y, z) + h_atom.name = "H" + return h_atom def protonate_sp2(x1_atom, x2_atom, x3_atom): @@ -312,19 +320,19 @@ def protonate_sp2(x1_atom, x2_atom, x3_atom): Returns: new hydrogen atom """ - dX = (x1_atom.x + x3_atom.x)*0.5 - x2_atom.x - dY = (x1_atom.y + x3_atom.y)*0.5 - x2_atom.y - dZ = (x1_atom.z + x3_atom.z)*0.5 - x2_atom.z - length = math.sqrt( dX*dX + dY*dY + dZ*dZ ) - x = x2_atom.x - dX/length - y = x2_atom.y - dY/length - z = x2_atom.z - dZ/length - H = make_new_h(x2_atom,x,y,z) - H.name = "H" - return H + dx = (x1_atom.x + x3_atom.x)*0.5 - x2_atom.x + dy = (x1_atom.y + x3_atom.y)*0.5 - x2_atom.y + dz = (x1_atom.z + x3_atom.z)*0.5 - x2_atom.z + length = math.sqrt(dx*dx + dy*dy + dz*dz) + x = x2_atom.x - dx/length + y = x2_atom.y - dy/length + z = x2_atom.z - dz/length + h_atom = make_new_h(x2_atom, x, y, z) + h_atom.name = "H" + return h_atom -def make_new_h(atom, x,y,z): +def make_new_h(atom, x, y, z): """Add a new hydrogen to an atom at the specified position. Args: @@ -335,28 +343,38 @@ def make_new_h(atom, x,y,z): Returns: new hydrogen atom """ - new_H = propka.atom.Atom() - new_H.set_property(numb=None, name='H%s' % atom.name[1:], + new_h = propka.atom.Atom() + new_h.set_property(numb=None, name='H%s' % atom.name[1:], res_name=atom.res_name, chain_id=atom.chain_id, res_num=atom.res_num, x=x, y=y, z=z, occ=None, beta=None) - new_H.element = 'H' - new_H.bonded_atoms = [atom] - new_H.charge = 0 - new_H.steric_number = 0 - new_H.number_of_lone_pairs = 0 - new_H.number_of_protons_to_add = 0 - new_H.num_pi_elec_2_3_bonds = 0 - atom.bonded_atoms.append(new_H) - atom.conformation_container.add_atom(new_H) - return new_H + new_h.element = 'H' + new_h.bonded_atoms = [atom] + new_h.charge = 0 + new_h.steric_number = 0 + new_h.number_of_lone_pairs = 0 + new_h.number_of_protons_to_add = 0 + new_h.num_pi_elec_2_3_bonds = 0 + atom.bonded_atoms.append(new_h) + atom.conformation_container.add_atom(new_h) + return new_h # TODO - the remaining functions form a distinct "module" for desolvation -MYSTERY_MIN_DISTANCE = 2.75 -MIN_DISTANCE_4TH = math.pow(MYSTERY_MIN_DISTANCE, 4) +# TODO - I have no idea what these constants mean so I labeled them "UNK_" +UNK_MIN_DISTANCE = 2.75 +MIN_DISTANCE_4TH = math.pow(UNK_MIN_DISTANCE, 4) +UNK_DIELECTRIC1 = 160 +UNK_DIELECTRIC2 = 30 +UNK_PKA_SCALING1 = 244.12 +UNK_BACKBONE_DISTANCE1 = 6.0 +UNK_BACKBONE_DISTANCE2 = 3.0 +UNK_FANGLE_MIN = 0.001 +UNK_PKA_SCALING2 = 0.8 +COMBINED_NUM_BURIED_MAX = 900 +SEPARATE_NUM_BURIED_MAX = 400 def radial_volume_desolvation(parameters, group): @@ -372,21 +390,23 @@ def radial_volume_desolvation(parameters, group): # He had to re-read the original paper to figure out what it was. # A better name would be num_volume. group.Nmass = 0 - min_distance_4th = MIN_DISTANCE_4TH + min_dist_4th = MIN_DISTANCE_4TH for atom in all_atoms: # ignore atoms in the same residue - if atom.res_num == group.atom.res_num and atom.chain_id == group.atom.chain_id: + if atom.res_num == group.atom.res_num \ + and atom.chain_id == group.atom.chain_id: continue sq_dist = squared_distance(group, atom) # desolvation if sq_dist < parameters.desolv_cutoff_squared: - # use a default relative volume of 1.0 if the volume of the element is not found in parameters + # use a default relative volume of 1.0 if the volume of the element + # is not found in parameters # insert check for methyl groups - if atom.element == 'C' and atom.name not in ['CA','C']: - dv = parameters.VanDerWaalsVolume['C4'] + if atom.element == 'C' and atom.name not in ['CA', 'C']: + dvol = parameters.VanDerWaalsVolume['C4'] else: - dv = parameters.VanDerWaalsVolume.get(atom.element, 1.0) - dv_inc = dv/max(min_distance_4th, sq_dist*sq_dist) + dvol = parameters.VanDerWaalsVolume.get(atom.element, 1.0) + dv_inc = dvol/max(min_dist_4th, sq_dist*sq_dist) volume += dv_inc # buried if sq_dist < parameters.buried_cutoff_squared: @@ -394,417 +414,475 @@ def radial_volume_desolvation(parameters, group): group.buried = calculate_weight(parameters, group.Nmass) scale_factor = calculate_scale_factor(parameters, group.buried) volume_after_allowance = max(0.00, volume-parameters.desolvationAllowance) - group.Emass = group.charge * parameters.desolvationPrefactor * volume_after_allowance * scale_factor + group.Emass = group.charge * parameters.desolvationPrefactor \ + * volume_after_allowance * scale_factor def calculate_scale_factor(parameters, weight): + """Calculate desolvation scaling factor. + + Args: + parameters: parameters for desolvation calculation + weight: weight for scaling factor + Returns: + scaling factor + """ scale_factor = 1.0 - (1.0 - parameters.desolvationSurfaceScalingFactor)*(1.0 - weight) return scale_factor -def calculate_weight(parameters, Nmass): +def calculate_weight(parameters, num_volume): + """Calculate the atom-based desolvation weight. + + TODO - figure out why a similar function exists in version.py + + Args: + parameters: parameters for desolvation calculation + num_volume: number of heavy atoms within desolvation calculation volume + Returns: + desolvation weight """ - calculating the atom-based desolvation weight - """ - weight = float(Nmass - parameters.Nmin)/float(parameters.Nmax - parameters.Nmin) + weight = float(num_volume - parameters.Nmin) \ + / float(parameters.Nmax - parameters.Nmin) weight = min(1.0, weight) weight = max(0.0, weight) - return weight -def calculatePairWeight(parameters, Nmass1, Nmass2): +def calculate_pair_weight(parameters, num_volume1, num_volume2): + """Calculate the atom-pair based desolvation weight. + + Args: + num_volume1: number of heavy atoms within first desolvation volume + num_volume2: number of heavy atoms within second desolvation volume + Returns: + desolvation weight """ - calculating the atom-pair based desolvation weight - """ - Nmass = Nmass1 + Nmass2 - Nmin = 2*parameters.Nmin - Nmax = 2*parameters.Nmax - weight = float(Nmass - Nmin)/float(Nmax - Nmin) + num_volume = num_volume1 + num_volume2 + num_min = 2*parameters.Nmin + num_max = 2*parameters.Nmax + weight = float(num_volume - num_min)/float(num_max - num_min) weight = min(1.0, weight) weight = max(0.0, weight) - return weight -def HydrogenBondEnergy(distance, dpka_max, cutoff, f_angle=1.0): +def hydrogen_bond_energy(dist, dpka_max, cutoffs, f_angle=1.0): + """Calculate hydrogen-bond interaction pKa shift. + + Args: + dist: distance for hydrogen bond + dpka_max: maximum pKa value shift + cutoffs: array with max and min distance values + f_angle: angle scaling factor + Returns: + pKa shift value """ - returns a hydrogen-bond interaction pKa shift - """ - if distance < cutoff[0]: + if dist < cutoffs[0]: value = 1.00 - elif distance > cutoff[1]: + elif dist > cutoffs[1]: value = 0.00 else: - value = 1.0-(distance-cutoff[0])/(cutoff[1]-cutoff[0]) - - dpKa = dpka_max*value*f_angle - - return abs(dpKa) + value = 1.0 - (dist - cutoffs[0])/(cutoffs[1] - cutoffs[0]) + dpka = dpka_max*value*f_angle + return abs(dpka) +def angle_distance_factors(atom1=None, atom2=None, atom3=None, center=None): + """Calculate distance and angle factors for three atoms for backbone interactions. + + NOTE - you need to use atom1 to be the e.g. ASP atom if distance is reset at + return: [O1 -- H2-N3]. -def AngleFactorX(atom1=None, atom2=None, atom3=None, center=None): - """ - Calculates the distance and angle-factor from three atoms for back-bone interactions, - IMPORTANT: you need to use atom1 to be the e.g. ASP atom if distance is reset at return: [O1 -- H2-N3] Also generalized to be able to be used for residue 'centers' for C=O COO interactions. + + Args: + atom1: first atom for calculation (could be None) + atom2: second atom for calculation + atom3: third atom for calculation + center: center point between atoms 1 and 2 + Returns + [distance factor between atoms 1 and 2, + angle factor, + distance factor between atoms 2 and 3] """ - - dX_32 = atom2.x - atom3.x - dY_32 = atom2.y - atom3.y - dZ_32 = atom2.z - atom3.z - - distance_23 = math.sqrt( dX_32*dX_32 + dY_32*dY_32 + dZ_32*dZ_32 ) - - dX_32 = dX_32/distance_23 - dY_32 = dY_32/distance_23 - dZ_32 = dZ_32/distance_23 - - if atom1 == None: - dX_21 = center[0] - atom2.x - dY_21 = center[1] - atom2.y - dZ_21 = center[2] - atom2.z - else: - dX_21 = atom1.x - atom2.x - dY_21 = atom1.y - atom2.y - dZ_21 = atom1.z - atom2.z - - distance_12 = math.sqrt( dX_21*dX_21 + dY_21*dY_21 + dZ_21*dZ_21 ) - - dX_21 = dX_21/distance_12 - dY_21 = dY_21/distance_12 - dZ_21 = dZ_21/distance_12 - - f_angle = dX_21*dX_32 + dY_21*dY_32 + dZ_21*dZ_32 - - - return distance_12, f_angle, distance_23 - - - -def hydrogen_bond_interaction(group1, group2, version): - - # find the smallest distance between interacting atoms - atoms1 = group1.get_interaction_atoms(group2) - atoms2 = group2.get_interaction_atoms(group1) - [closest_atom1, distance, closest_atom2] = propka.calculations.get_smallest_distance(atoms1, atoms2) - - if None in [closest_atom1, closest_atom2]: - warning('Side chain interaction failed for %s and %s' % (group1.label, group2.label)) - return None - - # get the parameters - [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1,closest_atom2) - - if dpka_max==None or None in cutoff: - return None - - # check that the closest atoms are close enough - if distance >= cutoff[1]: - return None - - # check that bond distance criteria is met - bond_distance_too_short = group1.atom.is_atom_within_bond_distance(group2.atom, - version.parameters.min_bond_distance_for_hydrogen_bonds,1) - if bond_distance_too_short: - return None - - # set the angle factor + # The angle factor # # ---closest_atom1/2 # . # . # the_hydrogen---closest_atom2/1--- + dx_32 = atom2.x - atom3.x + dy_32 = atom2.y - atom3.y + dz_32 = atom2.z - atom3.z + dist_23 = math.sqrt(dx_32 * dx_32 + dy_32 * dy_32 + dz_32 * dz_32) + dx_32 = dx_32/dist_23 + dy_32 = dy_32/dist_23 + dz_32 = dz_32/dist_23 + if atom1 is None: + dx_21 = center[0] - atom2.x + dy_21 = center[1] - atom2.y + dz_21 = center[2] - atom2.z + else: + dx_21 = atom1.x - atom2.x + dy_21 = atom1.y - atom2.y + dz_21 = atom1.z - atom2.z + dist_12 = math.sqrt(dx_21 * dx_21 + dy_21 * dy_21 + dz_21 * dz_21) + dx_21 = dx_21/dist_12 + dy_21 = dy_21/dist_12 + dz_21 = dz_21/dist_12 + f_angle = dx_21*dx_32 + dy_21*dy_32 + dz_21*dz_32 + return dist_12, f_angle, dist_23 + + +def hydrogen_bond_interaction(group1, group2, version): + """Calculate energy for hydrogen bond interactions between two groups. + + Args: + group1: first interacting group + group2: second interacting group + version: an object that contains version-specific parameters + Returns: + hydrogen bond interaction energy + """ + # find the smallest distance between interacting atoms + atoms1 = group1.get_interaction_atoms(group2) + atoms2 = group2.get_interaction_atoms(group1) + [closest_atom1, dist, closest_atom2] = get_smallest_distance(atoms1, atoms2) + if None in [closest_atom1, closest_atom2]: + warning('Side chain interaction failed for %s and %s' % (group1.label, + group2.label)) + return None + # get the parameters + [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1, + closest_atom2) + if (dpka_max is None) or (None in cutoff): + return None + # check that the closest atoms are close enough + if dist >= cutoff[1]: + return None + # check that bond distance criteria is met + min_hbond_dist = version.parameters.min_bond_distance_for_hydrogen_bonds + if group1.atom.is_atom_within_bond_distance(group2.atom, min_hbond_dist, 1): + return None + # set angle factor f_angle = 1.0 if group2.type in version.parameters.angular_dependent_sidechain_interactions: if closest_atom2.element == 'H': heavy_atom = closest_atom2.bonded_atoms[0] - hydrogen = closest_atom2 - distance, f_angle, nada = propka.calculations.AngleFactorX(closest_atom1, hydrogen, heavy_atom) + hydrogen = closest_atom2 + dist, f_angle, _ = angle_distance_factors(closest_atom1, hydrogen, + heavy_atom) else: - # Either the structure is corrupt (no hydrogen), or the heavy atom is closer to - # the titratable atom than the hydrogen. In either case we set the angle factor - # to 0 + # Either the structure is corrupt (no hydrogen), or the heavy atom + # is closer to the titratable atom than the hydrogen. In either + # case, we set the angle factor to 0 f_angle = 0.0 - elif group1.type in version.parameters.angular_dependent_sidechain_interactions: if closest_atom1.element == 'H': heavy_atom = closest_atom1.bonded_atoms[0] - hydrogen = closest_atom1 - distance, f_angle, nada = propka.calculations.AngleFactorX(closest_atom2, hydrogen, heavy_atom) + hydrogen = closest_atom1 + dist, f_angle, _ = angle_distance_factors(closest_atom2, hydrogen, + heavy_atom) else: - # Either the structure is corrupt (no hydrogen), or the heavy atom is closer to - # the titratable atom than the hydrogen. In either case we set the angle factor - # to 0 + # Either the structure is corrupt (no hydrogen), or the heavy atom + # is closer to the titratable atom than the hydrogen. In either + # case, we set the angle factor to 0 f_angle = 0.0 - - weight = version.calculatePairWeight(group1.Nmass, group2.Nmass) - - exception, value = version.checkExceptions(group1, group2) - #exception = False # circumventing exception - if exception == True: - """ do nothing, value should have been assigned """ - #info(" exception for %s %s %6.2lf" % (group1.label, group2.label, value)) + weight = version.calculate_pair_weight(group1.Nmass, group2.Nmass) + exception, value = version.check_exceptions(group1, group2) + if exception: + # Do nothing, value should have been assigned. + pass else: - value = version.calculateSideChainEnergy(distance, dpka_max, cutoff, weight, f_angle) - - # info('distance',distance) - # info('dpka_max',dpka_max) - # info('cutoff',cutoff) - # info('f_angle',f_angle) - # info('weight',weight) - # info('value',value) - # info('===============================================') - + value = version.calculateSideChainEnergy(dist, dpka_max, cutoff, weight, + f_angle) return value +def electrostatic_interaction(group1, group2, dist, version): + """Calculate electrostatic interaction betwee two groups. -def HydrogenBondEnergy(distance, dpka_max, cutoff, f_angle=1.0): + Args: + group1: first interacting group + group2: second interacting group + dist: distance between groups + version: version-specific object with parameters and functions + Returns: + electrostatic interaction energy or None (if no interaction is appropriate) """ - returns a hydrogen-bond interaction pKa shift - """ - if distance < cutoff[0]: - value = 1.00 - elif distance > cutoff[1]: - value = 0.00 - else: - value = 1.0-(distance-cutoff[0])/(cutoff[1]-cutoff[0]) - - dpKa = dpka_max*value*f_angle - - return abs(dpKa) - - - - -def electrostatic_interaction(group1, group2, distance, version): - # check if we should do coulomb interaction at all - if not version.checkCoulombPair(group1, group2, distance): + if not version.check_coulomb_pair(group1, group2, dist): return None - - weight = version.calculatePairWeight(group1.Nmass, group2.Nmass) - value = version.calculateCoulombEnergy(distance, weight) - + weight = version.calculate_pair_weight(group1.Nmass, group2.Nmass) + value = version.calculate_coulomb_energy(dist, weight) return value -def checkCoulombPair(parameters, group1, group2, distance): - """ - Checks if this Coulomb interaction should be done - a propka2.0 hack - """ - Npair = group1.Nmass + group2.Nmass - do_coulomb = True +def check_coulomb_pair(parameters, group1, group2, dist): + """Checks if this Coulomb interaction should be done. - # check if both groups are titratable (ions are taken care of in determinants::setIonDeterminants) + NOTE - this is a propka2.0 hack + TODO - figure out why a similar function exists in version.py + + Args: + parameters: parameters for Coulomb calculations + group1: first interacting group + group2: second interacting group + dist: distance between groups + Returns: + Boolean + """ + num_volume = group1.Nmass + group2.Nmass + do_coulomb = True + # check if both groups are titratable (ions are taken care of in + # determinants::setIonDeterminants) if not (group1.titratable and group2.titratable): do_coulomb = False - # check if the distance is not too big - if distance > parameters.coulomb_cutoff2: + if dist > parameters.coulomb_cutoff2: do_coulomb = False - # check that desolvation is ok - if Npair < parameters.Nmin: + if num_volume < parameters.Nmin: do_coulomb = False - return do_coulomb -def CoulombEnergy(distance, weight, parameters): - """ - calculates the Coulomb interaction pKa shift based on Coulombs law - eps = 60.0 for the moment; to be scaled with 'weight' - """ - #diel = 80.0 - 60.0*weight +def coulomb_energy(dist, weight, parameters): + """Calculates the Coulomb interaction pKa shift based on Coulomb's law. - diel = 160 - (160 -30)*weight - R = max(distance, parameters.coulomb_cutoff1) - scale = (R - parameters.coulomb_cutoff2)/(parameters.coulomb_cutoff1 -parameters.coulomb_cutoff2) + Args: + dist: distance for electrostatic interaction + weight: scaling of dielectric constant + parameters: parameter object for calculation + Returns: + pKa shift + """ + diel = UNK_DIELECTRIC1 - (UNK_DIELECTRIC1 - UNK_DIELECTRIC2)*weight + dist = max(dist, parameters.coulomb_cutoff1) + scale = (dist - parameters.coulomb_cutoff2)/(parameters.coulomb_cutoff1 \ + - parameters.coulomb_cutoff2) scale = max(0.0, scale) scale = min(1.0, scale) - - dpka = 244.12/(diel*R) *scale - + dpka = UNK_PKA_SCALING1/(diel*dist)*scale return abs(dpka) +def backbone_reorganization(parameters, conformation): + """Perform calculations related to backbone reorganizations. -def BackBoneReorganization(parameters, conformation): - """ - adding test stuff + NOTE - this was described in the code as "adding test stuff" + NOTE - this function does not appear to be used + TODO - figure out why a similar function exists in version.py + + Args: + parameters: not used + conformation: specific molecule conformation """ titratable_groups = conformation.get_backbone_reorganisation_groups() - BBC_groups = conformation.get_backbone_CO_groups() + bbc_groups = conformation.get_backbone_CO_groups() for titratable_group in titratable_groups: weight = titratable_group.buried - dpKa = 0.00 - for BBC_group in BBC_groups: + dpka = 0.00 + for bbc_group in bbc_groups: center = [titratable_group.x, titratable_group.y, titratable_group.z] - distance, f_angle, nada = AngleFactorX(atom2=BBC_group.get_interaction_atoms(titratable_group)[0], - atom3=BBC_group.atom, - center=center) - if distance < 6.0 and f_angle > 0.001: - value = 1.0-(distance-3.0)/(6.0-3.0) - dpKa += 0.80*min(1.0, value) - - titratable_group.Elocl = dpKa*weight - return + atom2 = bbc_group.get_interaction_atoms(titratable_group)[0] + dist, f_angle, _ = angle_distance_factors(atom2=atom2, + atom3=bbc_group.atom, + center=center) + if dist < UNK_BACKBONE_DISTANCE1 and f_angle > UNK_FANGLE_MIN: + value = 1.0 - (dist-UNK_BACKBONE_DISTANCE2) \ + / (UNK_BACKBONE_DISTANCE1-UNK_BACKBONE_DISTANCE2) + dpka += UNK_PKA_SCALING2*min(1.0, value) + titratable_group.Elocl = dpka*weight -# -# Exception methods -# +def check_exceptions(version, group1, group2): + """Checks for atypical behavior in interactions between two groups. + Checks are made based on group type. -def checkExceptions(version, group1, group2): + TODO - figure out why a similar function exists in version.py + + Args: + version: version object + group1: first group for check + group2: second group for check + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) """ - checks for exceptions for this version - using defaults - """ - resType1 = group1.type - resType2 = group2.type - - if (resType1 == "COO" and resType2 == "ARG"): - exception, value = checkCooArgException(group1, group2, version) - elif (resType1 == "ARG" and resType2 == "COO"): - exception, value = checkCooArgException(group2, group1, version) - elif (resType1 == "COO" and resType2 == "COO"): - exception, value = checkCooCooException(group1, group2, version) - elif (resType1 == "CYS" and resType2 == "CYS"): - exception, value = checkCysCysException(group1, group2, version) - elif (resType1 == "COO" and resType2 == "HIS") or \ - (resType1 == "HIS" and resType2 == "COO"): - exception, value = checkCooHisException(group1, group2, version) - elif (resType1 == "OCO" and resType2 == "HIS") or \ - (resType1 == "HIS" and resType2 == "OCO"): - exception, value = checkOcoHisException(group1, group2, version) - elif (resType1 == "CYS" and resType2 == "HIS") or \ - (resType1 == "HIS" and resType2 == "CYS"): - exception, value = checkCysHisException(group1, group2, version) + res_type1 = group1.type + res_type2 = group2.type + if (res_type1 == "COO") and (res_type2 == "ARG"): + exception, value = check_coo_arg_exception(group1, group2, version) + elif (res_type1 == "ARG") and (res_type2 == "COO"): + exception, value = check_coo_arg_exception(group2, group1, version) + elif (res_type1 == "COO") and (res_type2 == "COO"): + exception, value = check_coo_coo_exception(group1, group2, version) + elif (res_type1 == "CYS") and (res_type2 == "CYS"): + exception, value = check_cys_cys_exception(group1, group2, version) + elif (res_type1 == "COO") and (res_type2 == "HIS") or \ + (res_type1 == "HIS") and (res_type2 == "COO"): + exception, value = check_coo_his_exception(group1, group2, version) + elif (res_type1 == "OCO") and (res_type2 == "HIS") or \ + (res_type1 == "HIS") and (res_type2 == "OCO"): + exception, value = check_oco_his_exception(group1, group2, version) + elif (res_type1 == "CYS") and (res_type2 == "HIS") or \ + (res_type1 == "HIS") and (res_type2 == "CYS"): + exception, value = check_cys_his_exception(group1, group2, version) else: # do nothing, no exception for this pair - exception = False; value = None - + exception = False + value = None return exception, value +def check_coo_arg_exception(group_coo, group_arg, version): + """Check for COO-ARG interaction atypical behavior. -def checkCooArgException(group_coo, group_arg, version): - """ - checking Coo-Arg exception: uses the two shortes unique distances (involving 2+2 atoms) - """ + Uses the two shortest unique distances (involving 2+2 atoms) - str = "xxx" + Args: + group_coo: COO group + group_arg: ARG group + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) + """ exception = True value_tot = 0.00 - - #dpka_max = parameters.sidechain_interaction.get_value(group_coo.type,group_arg.type) - #cutoff = parameters.sidechain_cutoffs.get_value(group_coo.type,group_arg.type) - # needs to be this way since you want to find shortest distance first - #info("--- exception for %s %s ---" % (group_coo.label, group_arg.label)) atoms_coo = [] atoms_coo.extend(group_coo.get_interaction_atoms(group_arg)) atoms_arg = [] atoms_arg.extend(group_arg.get_interaction_atoms(group_coo)) - - - for iter in ["shortest", "runner-up"]: + for _ in ["shortest", "runner-up"]: # find the closest interaction pair - [closest_coo_atom, distance, closest_arg_atom] = get_smallest_distance(atoms_coo, atoms_arg) - [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_coo_atom,closest_arg_atom) - # calculate and sum up interaction energy + [closest_coo_atom, dist, closest_arg_atom] = get_smallest_distance(atoms_coo, + atoms_arg) + [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_coo_atom, + closest_arg_atom) + # calculate and sum up interaction energy f_angle = 1.00 if group_arg.type in version.parameters.angular_dependent_sidechain_interactions: atom3 = closest_arg_atom.bonded_atoms[0] - distance, f_angle, nada = AngleFactorX(closest_coo_atom, closest_arg_atom, atom3) - - value = HydrogenBondEnergy(distance, dpka_max, cutoff, f_angle) - #info(iter, closest_coo_atom, closest_arg_atom,distance,value) + dist, f_angle, _ = angle_distance_factors(closest_coo_atom, + closest_arg_atom, + atom3) + value = hydrogen_bond_energy(dist, dpka_max, cutoff, f_angle) value_tot += value # remove closest atoms before we attemp to find the runner-up pair atoms_coo.remove(closest_coo_atom) atoms_arg.remove(closest_arg_atom) - - return exception, value_tot -def checkCooCooException(group1, group2, version): - """ - checking Coo-Coo hydrogen-bond exception +def check_coo_coo_exception(group1, group2, version): + """Check for COO-COO hydrogen-bond atypical interaction behavior. + + Args: + group1: first group for check + group2: second group for check + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) """ exception = True - [closest_atom1, distance, closest_atom2] = get_smallest_distance(group1.get_interaction_atoms(group2), - group2.get_interaction_atoms(group1)) - - #dpka_max = parameters.sidechain_interaction.get_value(group1.type,group2.type) - #cutoff = parameters.sidechain_cutoffs.get_value(group1.type,group2.type) - [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1,closest_atom2) + interact_groups12 = group1.get_interaction_atoms(group2) + interact_groups21 = group2.get_interaction_atoms(group1) + [closest_atom1, dist, closest_atom2] = get_smallest_distance(interact_groups12, + interact_groups21) + [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1, + closest_atom2) f_angle = 1.00 - value = HydrogenBondEnergy(distance, dpka_max, cutoff, f_angle) - weight = calculatePairWeight(version.parameters, group1.Nmass, group2.Nmass) + value = hydrogen_bond_energy(dist, dpka_max, cutoff, f_angle) + weight = calculate_pair_weight(version.parameters, group1.Nmass, group2.Nmass) value = value * (1.0 + weight) - return exception, value +def check_coo_his_exception(group1, group2, version): + """Check for COO-HIS atypical interaction behavior -def checkCooHisException(group1, group2, version): - """ - checking Coo-His exception + Args: + group1: first group for check + group2: second group for check + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) """ exception = False - if checkBuried(group1.Nmass, group2.Nmass): + if check_buried(group1.Nmass, group2.Nmass): exception = True - return exception, version.parameters.COO_HIS_exception -def checkOcoHisException(group1, group2, version): - """ - checking Coo-His exception +def check_oco_his_exception(group1, group2, version): + """Check for OCO-HIS atypical interaction behavior + + Args: + group1: first group for check + group2: second group for check + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) """ exception = False - if checkBuried(group1.Nmass, group2.Nmass): + if check_buried(group1.Nmass, group2.Nmass): exception = True - return exception, version.parameters.OCO_HIS_exception -def checkCysHisException(group1, group2, version): - """ - checking Cys-His exception +def check_cys_his_exception(group1, group2, version): + """Check for CYS-HIS atypical interaction behavior + + Args: + group1: first group for check + group2: second group for check + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) """ exception = False - if checkBuried(group1.Nmass, group2.Nmass): + if check_buried(group1.Nmass, group2.Nmass): exception = True - return exception, version.parameters.CYS_HIS_exception -def checkCysCysException(group1, group2, version): - """ - checking Cys-Cys exception +def check_cys_cys_exception(group1, group2, version): + """Check for CYS-CYS atypical interaction behavior + + Args: + group1: first group for check + group2: second group for check + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) """ exception = False - if checkBuried(group1.Nmass, group2.Nmass): + if check_buried(group1.Nmass, group2.Nmass): exception = True - return exception, version.parameters.CYS_CYS_exception +def check_buried(num_volume1, num_volume2): + """Check to see if an interaction is buried - - -def checkBuried(Nmass1, Nmass2): + Args: + num_volume1: number of buried heavy atoms in volume 1 + num_volume2: number of buried heavy atoms in volume 2 + Returns: + True if interaction is buried, False otherwise """ - returns True if an interaction is buried - """ - - if (Nmass1 + Nmass2 <= 900) and (Nmass1 <= 400 or Nmass2 <= 400): + if (num_volume1 + num_volume2 <= COMBINED_NUM_BURIED_MAX) \ + and (num_volume1 <= SEPARATE_NUM_BURIED_MAX \ + or num_volume2 <= SEPARATE_NUM_BURIED_MAX): return False - else: - return True + return True diff --git a/propka/conformation_container.py b/propka/conformation_container.py index ce1a9c7..9b15ce8 100644 --- a/propka/conformation_container.py +++ b/propka/conformation_container.py @@ -182,7 +182,7 @@ class Conformation_container: propka.determinants.setIonDeterminants(self, version) # calculating the back-bone reorganization/desolvation term - version.calculateBackBoneReorganization(self) + version.calculatebackbone_reorganization(self) # setting remaining non-iterative and iterative side-chain & Coulomb interaction determinants propka.determinants.setDeterminants(self.get_sidechain_groups(), version=version, options=options) diff --git a/propka/determinants.py b/propka/determinants.py index 3154875..658905a 100644 --- a/propka/determinants.py +++ b/propka/determinants.py @@ -157,9 +157,9 @@ def setIonDeterminants(conformation_container, version): for ion_group in conformation_container.get_ions(): squared_distance = propka.calculations.squared_distance(titratable_group, ion_group) if squared_distance < version.parameters.coulomb_cutoff2_squared: - weight = version.calculatePairWeight(titratable_group.Nmass, ion_group.Nmass) + weight = version.calculate_pair_weight(titratable_group.Nmass, ion_group.Nmass) # the pKa of both acids and bases are shifted up by negative ions (and vice versa) - value = (-ion_group.charge) * version.calculateCoulombEnergy(math.sqrt(squared_distance), weight) + value = (-ion_group.charge) * version.calculate_coulomb_energy(math.sqrt(squared_distance), weight) newDeterminant = Determinant(ion_group, value) titratable_group.determinants['coulomb'].append(newDeterminant) @@ -206,7 +206,7 @@ def setBackBoneDeterminants(titratable_groups, backbone_groups, version): if titratable_atom.element == 'H': heavy_atom = titratable_atom.bonded_atoms[0] hydrogen_atom = titratable_atom - [d1, f_angle, d2] = propka.calculations.AngleFactorX(atom1=heavy_atom, + [d1, f_angle, d2] = propka.calculations.angle_distance_factors(atom1=heavy_atom, atom2=hydrogen_atom, atom3=backbone_atom) else: @@ -227,7 +227,7 @@ def setBackBoneDeterminants(titratable_groups, backbone_groups, version): if backbone_atom.element == 'H': backbone_N = backbone_atom.bonded_atoms[0] backbone_H = backbone_atom - [d1, f_angle, d2] = propka.calculations.AngleFactorX(atom1=titratable_atom, + [d1, f_angle, d2] = propka.calculations.angle_distance_factors(atom1=titratable_atom, atom2=backbone_H, atom3=backbone_N) else: @@ -238,7 +238,7 @@ def setBackBoneDeterminants(titratable_groups, backbone_groups, version): if f_angle > 0.001: - value = titratable_group.charge * propka.calculations.HydrogenBondEnergy(distance, dpKa_max, [cutoff1,cutoff2], f_angle) + value = titratable_group.charge * propka.calculations.hydrogen_bond_energy(distance, dpKa_max, [cutoff1,cutoff2], f_angle) newDeterminant = Determinant(backbone_group, value) titratable_group.determinants['backbone'].append(newDeterminant) diff --git a/propka/version.py b/propka/version.py index e09416c..861fc5b 100644 --- a/propka/version.py +++ b/propka/version.py @@ -18,7 +18,7 @@ class version: def calculate_desolvation(self, group): return self.desolvation_model(self.parameters, group) - def calculatePairWeight(self, Nmass1, Nmass2): + def calculate_pair_weight(self, Nmass1, Nmass2): return self.weight_pair_method(self.parameters, Nmass1, Nmass2) # side chains @@ -32,18 +32,18 @@ class version: def electrostatic_interaction(self, group1, group2, distance): return self.electrostatic_interaction_model(group1, group2, distance, self) - def calculateCoulombEnergy(self, distance, weight): + def calculate_coulomb_energy(self, distance, weight): return self.coulomb_interaction_model(distance, weight, self.parameters) - def checkCoulombPair(self, group1, group2, distance): + def check_coulomb_pair(self, group1, group2, distance): return self.check_coulomb_pair_method(self.parameters, group1, group2, distance) # backbone re-organisation - def calculateBackBoneReorganization(self, conformation): + def calculatebackbone_reorganization(self, conformation): return self.backbone_reorganisation_method(self.parameters, conformation) # exceptions - def checkExceptions(self, group1, group2): + def check_exceptions(self, group1, group2): return self.exception_check_method(self, group1, group2) def setup_bonding_and_protonation(self, molecular_container): @@ -66,23 +66,23 @@ class version_A(version): # desolvation related methods self.desolvation_model = calculations.radial_volume_desolvation - self.weight_pair_method = calculations.calculatePairWeight + self.weight_pair_method = calculations.calculate_pair_weight # side chain methods - self.sidechain_interaction_model = propka.calculations.HydrogenBondEnergy + self.sidechain_interaction_model = propka.calculations.hydrogen_bond_energy self.hydrogen_bond_interaction_model = propka.calculations.hydrogen_bond_interaction # colomb methods self.electrostatic_interaction_model = propka.calculations.electrostatic_interaction - self.check_coulomb_pair_method = propka.calculations.checkCoulombPair - self.coulomb_interaction_model = propka.calculations.CoulombEnergy + self.check_coulomb_pair_method = propka.calculations.check_coulomb_pair + self.coulomb_interaction_model = propka.calculations.coulomb_energy #backbone - self.backbone_interaction_model = propka.calculations.HydrogenBondEnergy - self.backbone_reorganisation_method = propka.calculations.BackBoneReorganization + self.backbone_interaction_model = propka.calculations.hydrogen_bond_energy + self.backbone_reorganisation_method = propka.calculations.backbone_reorganization # exception methods - self.exception_check_method = propka.calculations.checkExceptions + self.exception_check_method = propka.calculations.check_exceptions return def get_hydrogen_bond_parameters(self, atom1, atom2): @@ -188,20 +188,20 @@ class propka30(version): # desolvation related methods self.desolvation_model = calculations.radial_volume_desolvation - self.weight_pair_method = calculations.calculatePairWeight + self.weight_pair_method = calculations.calculate_pair_weight # side chain methods - self.sidechain_interaction_model = propka.calculations.HydrogenBondEnergy + self.sidechain_interaction_model = propka.calculations.hydrogen_bond_energy # colomb methods - self.check_coulomb_pair_method = propka.calculations.checkCoulombPair - self.coulomb_interaction_model = propka.calculations.CoulombEnergy + self.check_coulomb_pair_method = propka.calculations.check_coulomb_pair + self.coulomb_interaction_model = propka.calculations.coulomb_energy #backbone - self.backbone_reorganisation_method = propka.calculations.BackBoneReorganization + self.backbone_reorganisation_method = propka.calculations.backbone_reorganization # exception methods - self.exception_check_method = propka.calculations.checkExceptions + self.exception_check_method = propka.calculations.check_exceptions return From 5afc5d511aecc15678fb00dc0fa77f60923d7125 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 23 May 2020 15:51:47 -0700 Subject: [PATCH 08/65] Removed missed comment. --- propka/bonds.py | 1 - 1 file changed, 1 deletion(-) diff --git a/propka/bonds.py b/propka/bonds.py index 189f9cb..367c6d1 100644 --- a/propka/bonds.py +++ b/propka/bonds.py @@ -136,7 +136,6 @@ class BondMaker: if atom2.name == 'C': self.make_bond(atom1, atom2) - # TODO - stopped here. def connect_backbone(self, residue1, residue2): """Sets up bonds in the backbone From 83c54ec153fb1c51d99ad560f564ae6ac58e24ff Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 23 May 2020 17:22:18 -0700 Subject: [PATCH 09/65] De-lint conformation_container.py. Some public methods/members were changed. These were checked against Google for obvious use in other packages. --- propka/calculations.py | 2 +- propka/conformation_container.py | 600 +++++++++++++++++++------------ propka/coupled_groups.py | 4 +- propka/group.py | 18 +- propka/molecular_container.py | 6 +- propka/pdb.py | 6 +- 6 files changed, 387 insertions(+), 249 deletions(-) diff --git a/propka/calculations.py b/propka/calculations.py index acfecbd..ea2a54b 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -680,7 +680,7 @@ def backbone_reorganization(parameters, conformation): conformation: specific molecule conformation """ titratable_groups = conformation.get_backbone_reorganisation_groups() - bbc_groups = conformation.get_backbone_CO_groups() + bbc_groups = conformation.get_backbone_co_groups() for titratable_group in titratable_groups: weight = titratable_group.buried diff --git a/propka/conformation_container.py b/propka/conformation_container.py index 9b15ce8..a205648 100644 --- a/propka/conformation_container.py +++ b/propka/conformation_container.py @@ -1,451 +1,589 @@ -# -# Container for molecular conformations -# +"""Container for molecular conformations""" +import functools +import propka.ligand +from propka.output import make_interaction_map +from propka.determinant import Determinant +from propka.coupled_groups import nccg +from propka.determinants import setBackBoneDeterminants, setIonDeterminants +from propka.determinants import setDeterminants +from propka.group import Group, is_group +from propka.lib import info -from __future__ import division -from __future__ import print_function -import propka.group, propka.determinants, propka.determinant, propka.ligand, propka.output, propka.coupled_groups, functools -from propka.lib import info, warning +# A large number that gets multipled with the integer obtained from applying +# ord() to the atom chain ID. Used in calculating atom keys for sorting. +UNICODE_MULTIPLIER = 1e7 +# A number that gets mutiplied with an atom's residue number. Used in +# calculating keys for atom sorting. +RESIDUE_MULTIPLIER = 1000 + + +class ConformationContainer: + """Container for molecular conformations""" -class Conformation_container: def __init__(self, name='', parameters=None, molecular_container=None): + """Initialize conformation container. + + Args: + name: name for conformation + parameters: parmameters for conformation + molecular_container: container for molecule + """ self.molecular_container = molecular_container - self.name=name - self.parameters=parameters + self.name = name + self.parameters = parameters self.atoms = [] self.groups = [] self.chains = [] self.current_iter_item = 0 - + # TODO - what is marvin_pkas_calculated? self.marvin_pkas_calculated = False self.non_covalently_coupled_groups = False - return - - - # - # Group related methods - # def extract_groups(self): - """ Generates at list of molecular groups needed for calculating pKa values """ + """Generate molecular groups needed for calculating pKa values.""" for atom in self.get_non_hydrogen_atoms(): # has this atom been checked for groups? if atom.groups_extracted == 0: - group = propka.group.is_group(self.parameters, atom) + group = is_group(self.parameters, atom) else: group = atom.group - # if the atom has been checked in a another conformation, check if it has a - # group that should be used in this conformation as well + # if the atom has been checked in a another conformation, check + # if it has a group that should be used in this conformation as well if group: self.setup_and_add_group(group) - return - def additional_setup_when_reading_input_file(self): - - # if a group is coupled and we are reading a .propka_input file, + """Generate interaction map and charge centers.""" + # if a group is coupled and we are reading a .propka_input file, then # some more configuration might be needed - - # print coupling map - map = propka.output.make_interaction_map('Covalent coupling map for %s'%self, - self.get_covalently_coupled_groups(), - lambda g1,g2: g1 in g2.covalently_coupled_groups) - info(map) - + map_ = make_interaction_map('Covalent coupling map for %s' % self, + self.get_covalently_coupled_groups(), + lambda g1, g2: g1 in g2.covalently_coupled_groups) + info(map_) # check if we should set a common charge centre as well if self.parameters.common_charge_centre: self.set_common_charge_centres() - return - def set_common_charge_centres(self): - for system in self.get_coupled_systems(self.get_covalently_coupled_groups(), propka.group.Group.get_covalently_coupled_groups): + """Assign charge centers to groups.""" + for system in self.get_coupled_systems(self.get_covalently_coupled_groups(), + Group.get_covalently_coupled_groups): # make a list of the charge centre coordinates all_coordinates = list(map(lambda g: [g.x, g.y, g.z], system)) # find the common charge center - ccc = functools.reduce(lambda g1,g2: [g1[0]+g2[0], g1[1]+g2[1], g1[2]+g2[2]], all_coordinates) + ccc = functools.reduce(lambda g1, g2: [g1[0]+g2[0], g1[1]+g2[1], + g1[2]+g2[2]], + all_coordinates) ccc = list(map(lambda c: c/len(system), ccc)) # set the ccc for all coupled groups in this system - for g in system: - [g.x, g.y, g.z] = ccc - g.common_charge_centre = True - return - - - + for group in system: + [group.x, group.y, group.z] = ccc + group.common_charge_centre = True def find_covalently_coupled_groups(self): - """ Finds covalently coupled groups and sets common charge centres if needed """ + """Find covalently coupled groups and set common charge centres.""" for group in self.get_titratable_groups(): # Find covalently bonded groups - bonded_groups = self.find_bonded_titratable_groups(group.atom, 1, group.atom) - - # couple groups - for cg in bonded_groups: - if cg in group.covalently_coupled_groups: + bonded_groups = self.find_bonded_titratable_groups(group.atom, 1, + group.atom) + # coupled groups + for bond_group in bonded_groups: + if bond_group in group.covalently_coupled_groups: continue - if cg.atom.sybyl_type == group.atom.sybyl_type: - group.couple_covalently(cg) - + if bond_group.atom.sybyl_type == group.atom.sybyl_type: + group.couple_covalently(bond_group) # check if we should set a common charge centre as well if self.parameters.common_charge_centre: self.set_common_charge_centres() - # print coupling map - map = propka.output.make_interaction_map('Covalent coupling map for %s'%self, - #self.get_titratable_groups(), - self.get_covalently_coupled_groups(), - lambda g1,g2: g1 in g2.covalently_coupled_groups) - info(map) - - - return - + map_ = make_interaction_map('Covalent coupling map for %s' % self, + self.get_covalently_coupled_groups(), + lambda g1, g2: g1 in g2.covalently_coupled_groups) + info(map_) def find_non_covalently_coupled_groups(self, verbose=False): + """Find non-covalently coupled groups and set common charge centres. + + Args: + verbose: verbose output + """ # check if non-covalent coupling has already been set up in an input file - if len(list(filter(lambda g: len(g.non_covalently_coupled_groups)>0, self.get_titratable_groups())))>0: + if len(list(filter(lambda g: len(g.non_covalently_coupled_groups) > 0, + self.get_titratable_groups()))) > 0: self.non_covalently_coupled_groups = True - - propka.coupled_groups.nccg.identify_non_covalently_coupled_groups(self,verbose=verbose) - + nccg.identify_non_covalently_coupled_groups(self, verbose=verbose) # re-do the check - if len(list(filter(lambda g: len(g.non_covalently_coupled_groups)>0, self.get_titratable_groups())))>0: + if len(list(filter(lambda g: len(g.non_covalently_coupled_groups) > 0, + self.get_titratable_groups()))) > 0: self.non_covalently_coupled_groups = True - return + def find_bonded_titratable_groups(self, atom, num_bonds, original_atom): + """Find bonded titrable groups. - - def find_bonded_titratable_groups(self, atom, no_bonds, original_atom): + Args: + atom: atom to check for bonds + num_bonds: number of bonds for coupling + original_atom: another atom to check for bonds + Returns: + a set of bonded atom groups + """ res = set() - for ba in atom.bonded_atoms: + for bond_atom in atom.bonded_atoms: # skip the original atom - if ba == original_atom: + if bond_atom == original_atom: continue # check if this atom has a titratable group - if ba.group and ba.group.titratable and no_bonds <= self.parameters.coupling_max_number_of_bonds: - res.add(ba.group) + if bond_atom.group and bond_atom.group.titratable \ + and num_bonds <= self.parameters.coupling_max_number_of_bonds: + res.add(bond_atom.group) # check for titratable groups bonded to this atom - if no_bonds < self.parameters.coupling_max_number_of_bonds: - res |= self.find_bonded_titratable_groups(ba,no_bonds+1, original_atom) - + if num_bonds < self.parameters.coupling_max_number_of_bonds: + res |= self.find_bonded_titratable_groups(bond_atom, + num_bonds+1, + original_atom) return res - def setup_and_add_group(self, group): - """ Checks if we want to include this group in the calculations """ + """Check if we want to include this group in the calculations. + Args: + group: group to check + """ # Is it recognized as a group at all? if not group: return - # Other checks (include ligands, which chains etc.) - # if all ok, accept the group self.init_group(group) self.groups.append(group) def init_group(self, group): - """ - Initialize the given Group object. + """Initialize the given Group object. + + Args: + group: group object to initialize """ # set up the group - group.parameters=self.parameters + group.parameters = self.parameters group.setup() - # If --titrate_only option is set, make non-specified residues un-titratable: + # If --titrate_only option is set, make non-specified residues + # un-titratable: titrate_only = self.molecular_container.options.titrate_only if titrate_only is not None: - at = group.atom - if not (at.chain_id, at.res_num, at.icode) in titrate_only: + atom = group.atom + if not (atom.chain_id, atom.res_num, atom.icode) in titrate_only: group.titratable = False if group.residue_type == 'CYS': group.exclude_cys_from_results = True - - # - # pka calculation methods - # - def calculate_pka(self, version, options): + """Calculate pKas for conformation container. + + Args: + version: version object + options: option object + """ info('\nCalculating pKas for', self) - # calculate desolvation - for group in self.get_titratable_groups()+self.get_ions(): + for group in self.get_titratable_groups() + self.get_ions(): version.calculate_desolvation(group) - # calculate backbone interactions - propka.determinants.setBackBoneDeterminants(self.get_titratable_groups(), self.get_backbone_groups(), version) - + setBackBoneDeterminants(self.get_titratable_groups(), + self.get_backbone_groups(), version) # setting ion determinants - propka.determinants.setIonDeterminants(self, version) - + setIonDeterminants(self, version) # calculating the back-bone reorganization/desolvation term version.calculatebackbone_reorganization(self) - - # setting remaining non-iterative and iterative side-chain & Coulomb interaction determinants - propka.determinants.setDeterminants(self.get_sidechain_groups(), version=version, options=options) - + # setting remaining non-iterative and iterative side-chain & Coulomb + # interaction determinants + setDeterminants(self.get_sidechain_groups(), version=version, + options=options) # calculating the total pKa values - for group in self.groups: group.calculate_total_pka() - + for group in self.groups: + group.calculate_total_pka() # take coupling effects into account penalised_labels = self.coupling_effects() - - if self.parameters.remove_penalised_group and len(penalised_labels)>0: + if self.parameters.remove_penalised_group and len(penalised_labels) > 0: info('Removing penalised groups!!!') - - for g in self.get_titratable_groups(): - g.remove_determinants(penalised_labels) - + for group in self.get_titratable_groups(): + group.remove_determinants(penalised_labels) # re-calculating the total pKa values - for group in self.groups: group.calculate_total_pka() - - - return - + for group in self.groups: + group.calculate_total_pka() def coupling_effects(self): - # - # Bases: The group with the highest pKa (the most stable one in the - # charged form) will be the first one to adopt a proton as pH - # is lowered and this group is allowed to titrate. - # The remaining groups are penalised - # - # Acids: The group with the highest pKa (the least stable one in the - # charged form) will be the last group to loose the proton as - # pH is raised and will be penalised. - # The remaining groups are allowed to titrate. - # + """Penalize groups based on coupling effects. + + Bases: The group with the highest pKa (the most stable one in the + charged form) will be the first one to adopt a proton as pH is lowered + and this group is allowed to titrate. The remaining groups are + penalised. + + Acids: The group with the highest pKa (the least stable one in the + charged form) will be the last group to loose the proton as pH is + raised and will be penalised. The remaining groups are allowed to + titrate. + """ penalised_labels = [] - for all_groups in self.get_coupled_systems(self.get_covalently_coupled_groups(), - propka.group.Group.get_covalently_coupled_groups): - + Group.get_covalently_coupled_groups): # check if we should share determinants if self.parameters.shared_determinants: self.share_determinants(all_groups) - # find the group that has the highest pKa value - first_group = max(all_groups, key=lambda g:g.pka_value) - + first_group = max(all_groups, key=lambda g: g.pka_value) # In case of acids if first_group.charge < 0: - first_group.coupled_titrating_group = min(all_groups, key=lambda g:g.pka_value) + first_group.coupled_titrating_group = min(all_groups, key=lambda g: g.pka_value) penalised_labels.append(first_group.label) # group with the highest pKa is penalised - # In case of bases else: - for a in all_groups: - if a == first_group: + for group in all_groups: + if group == first_group: continue # group with the highest pKa is allowed to titrate... - a.coupled_titrating_group = first_group - penalised_labels.append(a.label) #... and the rest is penalised - + group.coupled_titrating_group = first_group + penalised_labels.append(group.label) #... and the rest is penalised return penalised_labels + @staticmethod + def share_determinants(groups): + """Share sidechain, backbone, and Coloumb determinants between groups. - def share_determinants(self, groups): - + Args: + groups: groups to share between + """ # make a list of the determinants to share - types = ['sidechain','backbone','coulomb'] - for type in types: + types = ['sidechain', 'backbone', 'coulomb'] + for type_ in types: # find maximum value for each determinant max_dets = {} - for g in groups: - for d in g.determinants[type]: + for group in groups: + for det in group.determinants[type_]: # update max dets - if d.group not in max_dets.keys(): - max_dets[d.group] = d.value + if det.group not in max_dets.keys(): + max_dets[det.group] = det.value else: - max_dets[d.group] = max(d.value, max_dets[d.group], key= lambda v: abs(v)) - + max_dets[det.group] = max(det.value, + max_dets[det.group], + key=lambda v: abs(v)) # overwrite/add maximum value for each determinant - for det_group in max_dets.keys(): - new_determinant = propka.determinant.Determinant(det_group, max_dets[det_group]) - for g in groups: - g.set_determinant(new_determinant,type) - - - return - + for det_group in max_dets: + new_determinant = Determinant(det_group, max_dets[det_group]) + for group in groups: + group.set_determinant(new_determinant, type_) def get_coupled_systems(self, groups, get_coupled_groups): - """ This generator will yield one covalently coupled system at the time """ + """A generator that yields covalently coupled systems. + + Args: + groups: groups for generating coupled systems + get_coupled_groups: TODO - I don't know what this is + Yields: + covalently coupled systems + """ groups = set(groups) - while len(groups)>0: + while len(groups) > 0: # extract a system of coupled groups ... system = set() self.get_a_coupled_system_of_groups(groups.pop(), system, get_coupled_groups) # ... and remove them from the list groups -= system - yield system - return + def get_a_coupled_system_of_groups(self, new_group, coupled_groups, + get_coupled_groups): + """Set up coupled systems of groups. - - def get_a_coupled_system_of_groups(self, new_group, coupled_groups, get_coupled_groups): + Args: + new_group: added to coupled_groups + coupled_groups: existing coupled groups + get_coupled_groups: TODO - I don't know what this + """ coupled_groups.add(new_group) - [self.get_a_coupled_system_of_groups(c, coupled_groups, get_coupled_groups) for c in get_coupled_groups(new_group) if c not in coupled_groups] - return + for coupled_group in get_coupled_groups(new_group): + if coupled_group not in coupled_groups: + self.get_a_coupled_system_of_groups(coupled_group, + coupled_groups, + get_coupled_groups) + def calculate_folding_energy(self, ph=None, reference=None): + """Calculate folding energy over all groups in conformation container. - # - # Energy/summary-related methods - # - def calculate_folding_energy(self, pH=None, reference=None): + Args: + ph: pH for calculation + reference: reference state + Returns: + folding energy + TODO - need units + """ ddg = 0.0 for group in self.groups: - #info('Folding energy for %s at pH %f: %f'%(group,pH,group.calculate_folding_energy(self.parameters, pH=pH, reference=reference))) - ddg += group.calculate_folding_energy(self.parameters, pH=pH, reference=reference) - + ddg += group.calculate_folding_energy(self.parameters, ph=ph, + reference=reference) return ddg - def calculate_charge(self, parmaeters, pH=None): + def calculate_charge(self, parameters, ph=None): + """Calculate charge for folded and unfolded states. + + Args: + parameters: parameters for calculation + ph: pH for calculation + Returns: + 1. charge for unfolded state + 2. charge for folded state + """ unfolded = folded = 0.0 for group in self.get_titratable_groups(): - unfolded += group.calculate_charge(parmaeters, pH=pH, state='unfolded') - folded += group.calculate_charge(parmaeters, pH=pH, state='folded') - - return unfolded,folded - - - # - # conformation/bookkeeping/atom methods - # + unfolded += group.calculate_charge(parameters, ph=ph, + state='unfolded') + folded += group.calculate_charge(parameters, ph=ph, + state='folded') + return unfolded, folded def get_backbone_groups(self): - """ returns all backbone groups needed for the pKa calculations """ + """Get backbone groups needed for the pKa calculations. + + Returns: + list of groups + """ return [group for group in self.groups if 'BB' in group.type] def get_sidechain_groups(self): - """ returns all sidechain groups needed for the pKa calculations """ - return [group for group in self.groups if ('BB' not in group.type\ - and not group.atom.cysteine_bridge)] + """Get sidechain groups needed for the pKa calculations. + + Returns: + list of groups + """ + return [group for group in self.groups \ + if ('BB' not in group.type and not group.atom.cysteine_bridge)] def get_covalently_coupled_groups(self): - return [g for g in self.groups if len(g.covalently_coupled_groups)>0] + """Get covalently coupled groups needed for pKa calculations. + + Returns: + list of groups + """ + return [g for g in self.groups \ + if len(g.covalently_coupled_groups) > 0] def get_non_covalently_coupled_groups(self): - return [g for g in self.groups if len(g.non_covalently_coupled_groups)>0] + """Get non-covalently coupled groups needed for pKa calculations. - def get_backbone_NH_groups(self): - """ returns all NH backbone groups needed for the pKa calculations """ + Returns: + list of groups + """ + return [g for g in self.groups \ + if len(g.non_covalently_coupled_groups) > 0] + + def get_backbone_nh_groups(self): + """Get NH backbone groups needed for pKa calculations. + + Returns: + list of groups + """ return [group for group in self.groups if group.type == 'BBN'] - def get_backbone_CO_groups(self): - """ returns all CO backbone groups needed for the pKa calculations """ + def get_backbone_co_groups(self): + """Get CO backbone groups needed for pKa calculations. + + Returns: + list of groups + """ return [group for group in self.groups if group.type == 'BBC'] def get_groups_in_residue(self, residue): + """Get residue groups needed for pKa calculations. + + Args: + residue: specific residue with groups + Returns: + list of groups + """ return [group for group in self.groups if group.residue_type == residue] def get_titratable_groups(self): + """Get all titratable groups needed for pKa calculations. + + Returns: + list of groups + """ return [group for group in self.groups if group.titratable] def get_groups_for_calculations(self): - """ - Returns a list of groups that should be included in results report. + """Get a list of groups that should be included in results report. + If --titrate_only option is specified, only residues that are titratable and are in that list are included; otherwise all titratable residues and CYS residues are included. + + Returns: + list of groups """ return [group for group in self.groups if group.use_in_calculations()] def get_acids(self): - return [group for group in self.groups if (group.residue_type in self.parameters.acid_list - and not group.atom.cysteine_bridge)] + """Get acid groups needed for pKa calculations. + + Returns: + list of groups + """ + return [group for group in self.groups \ + if (group.residue_type in self.parameters.acid_list \ + and not group.atom.cysteine_bridge)] def get_backbone_reorganisation_groups(self): - return [group for group in self.groups if (group.residue_type in self.parameters.backbone_reorganisation_list - and not group.atom.cysteine_bridge)] + """Get groups involved with backbone reorganization. + + Returns: + list of groups + """ + return [group for group in self.groups \ + if (group.residue_type in self.parameters.backbone_reorganisation_list \ + and not group.atom.cysteine_bridge)] def get_ions(self): - return [group for group in self.groups if group.residue_type in self.parameters.ions.keys()] + """Get ion groups. - def get_group_names(self, list): - return [group for group in self.groups if group.type in list] + Returns: + list of groups + """ + return [group for group in self.groups \ + if group.residue_type in self.parameters.ions.keys()] + def get_group_names(self, group_list): + """Get names of groups in list. + + Args: + group_list: list to check + Returns: + list of groups + """ + return [group for group in self.groups if group.type in group_list] def get_ligand_atoms(self): - return [atom for atom in self.atoms if atom.type=='hetatm'] + """Get atoms associated with ligands. + + Returns: + list of atoms + """ + return [atom for atom in self.atoms if atom.type == 'hetatm'] def get_heavy_ligand_atoms(self): - return [atom for atom in self.atoms if atom.type=='hetatm' and atom.element != 'H'] + """Get heavy atoms associated with ligands. - def get_chain(self,chain): + Returns: + list of atoms + """ + return [atom for atom in self.atoms \ + if atom.type == 'hetatm' and atom.element != 'H'] + + def get_chain(self, chain): + """Get atoms associated with a specific chain. + + Args: + chain: chain to select + Returns: + list of atoms + """ return [atom for atom in self.atoms if atom.chain_id != chain] - def add_atom(self, atom): - #info(self,'adding',atom) + """Add atom to container. + + Args: + atom: atom to add + """ self.atoms.append(atom) if not atom.conformation_container: atom.conformation_container = self if not atom.molecular_container: atom.molecular_container = self.molecular_container - # store chain id for bookkeeping if not atom.chain_id in self.chains: self.chains.append(atom.chain_id) - return - def copy_atom(self, atom): - new_atom = atom.make_copy() + """Add a copy of the atom to container. + + Args: + atom: atom to copy and add + """ + new_atom = atom.make_copy() self.atoms.append(new_atom) new_atom.conformation_container = self - return - def get_non_hydrogen_atoms(self): - return [atom for atom in self.atoms if atom.element!='H'] + """Get atoms that are not hydrogens. + Returns: + list of atoms + """ + return [atom for atom in self.atoms if atom.element != 'H'] def top_up(self, other): - """ Tops up self with all atoms found in other but not in self """ - my_residue_labels = { a.residue_label for a in self.atoms } + """Adds any atoms found in `other` but not in this container. + + Tops up self with all atoms found in other but not in self. + + Args: + other: conformation container with atoms to add + """ + my_residue_labels = {a.residue_label for a in self.atoms} for atom in other.atoms: if not atom.residue_label in my_residue_labels: self.copy_atom(atom) - return def find_group(self, group): - for g in self.groups: - if g.atom.residue_label == group.atom.residue_label: - if g.type == group.type: - return g + """Find a group in the container. + + Args: + group: group to find + Returns: + False (if group not found) or group + """ + for group_ in self.groups: + if group_.atom.residue_label == group.atom.residue_label: + if group_.type == group.type: + return group_ return False - def set_ligand_atom_names(self): + """Set names for atoms in ligands.""" for atom in self.get_ligand_atoms(): propka.ligand.assign_sybyl_type(atom) - return - - def __str__(self): - return'Conformation container %s with %d atoms and %d groups'%(self.name,len(self),len(self.groups)) + """String that lists statistics of atoms and groups.""" + str_ = 'Conformation container %s with %d atoms and %d groups' % (self.name, + len(self), + len(self.groups)) + return str_ def __len__(self): + """Number of atoms in container.""" return len(self.atoms) def sort_atoms(self): + """Sort atoms by `self.sort_atoms_key()` and renumber.""" # sort the atoms ... self.atoms.sort(key=self.sort_atoms_key) # ... and re-number them for i in range(len(self.atoms)): self.atoms[i].numb = i+1 - return + @staticmethod + def sort_atoms_key(atom): + """Generate key for atom sorting. - def sort_atoms_key(self, atom): - key = ord(atom.chain_id)*1e7 - key += atom.res_num*1000 + Args: + atom: atom for key generation. + Returns: + key for atom + """ + key = ord(atom.chain_id) * UNICODE_MULTIPLIER + key += atom.res_num * RESIDUE_MULTIPLIER if len(atom.name) > len(atom.element): key += ord(atom.name[len(atom.element)]) - #info(atom,ord(atom.name[len(atom.element)]), '|%s||%s|'%(atom.name,atom.element)) return key diff --git a/propka/coupled_groups.py b/propka/coupled_groups.py index 3d0ac2d..6b40aee 100644 --- a/propka/coupled_groups.py +++ b/propka/coupled_groups.py @@ -38,7 +38,7 @@ class non_covalently_couple_groups: if self.parameters.pH == 'variable': use_pH = min(group1.pka_value, group2.pka_value) - default_energy = energy_method(pH=use_pH, reference=self.parameters.reference) + default_energy = energy_method(ph=use_pH, reference=self.parameters.reference) default_pka1 = group1.pka_value default_pka2 = group2.pka_value @@ -54,7 +54,7 @@ class non_covalently_couple_groups: group2.calculate_total_pka() # store swapped energy and pka's - swapped_energy = energy_method(pH=use_pH, reference=self.parameters.reference) + swapped_energy = energy_method(ph=use_pH, reference=self.parameters.reference) swapped_pka1 = group1.pka_value swapped_pka2 = group2.pka_value diff --git a/propka/group.py b/propka/group.py index 14cc5ec..c524dac 100644 --- a/propka/group.py +++ b/propka/group.py @@ -557,11 +557,11 @@ class Group: # Energy-related methods # - def calculate_folding_energy(self, parameters, pH=None, reference=None): + def calculate_folding_energy(self, parameters, ph=None, reference=None): """ returning the electrostatic energy of this residue at pH 'pH' """ - if pH == None: + if ph == None: pH = parameters.pH if reference == None: reference = parameters.reference @@ -582,12 +582,12 @@ class Group: # calculating the ddG(low-pH --> pH) contribution # folded - x = pH - self.pka_value + x = ph - self.pka_value y = 10**x Q_pro = math.log10(1+y) # unfolded - x = pH - self.model_pka + x = ph - self.model_pka y = 10**x Q_mod = math.log10(1+y) @@ -596,12 +596,12 @@ class Group: return ddG - def calculate_charge(self, parmaeters, pH=7.0, state='folded'): + def calculate_charge(self, parmaeters, ph=7.0, state='folded'): if state == "unfolded": - x = self.charge * (self.model_pka - pH) + x = self.charge * (self.model_pka - ph) else: - x = self.charge * (self.pka_value - pH) + x = self.charge * (self.pka_value - ph) y = 10**x charge = self.charge*(y/(1.0+y)) @@ -1319,8 +1319,8 @@ def is_ligand_group_by_marvin_pkas(parameters, atom): if not atom.conformation_container.marvin_pkas_calculated: lpka = propka.ligand_pka_values.ligand_pka_values(parameters) lpka.get_marvin_pkas_for_molecular_container(atom.molecular_container, - min_pH=parameters.min_ligand_model_pka, - max_pH=parameters.max_ligand_model_pka) + min_ph=parameters.min_ligand_model_pka, + max_ph=parameters.max_ligand_model_pka) if atom.marvin_pka: diff --git a/propka/molecular_container.py b/propka/molecular_container.py index d2d59cb..74bf566 100644 --- a/propka/molecular_container.py +++ b/propka/molecular_container.py @@ -136,7 +136,7 @@ class Molecular_container: def average_of_conformations(self): # make a new configuration to hold the average values - avr_conformation = propka.conformation_container.Conformation_container(name='average', + avr_conformation = propka.conformation_container.ConformationContainer(name='average', parameters=self.conformations[self.conformation_names[0]].parameters, molecular_container=self) @@ -192,7 +192,7 @@ class Molecular_container: # calculate stability profile profile = [] for ph in propka.lib.make_grid(*grid): - ddg = self.conformations[conformation].calculate_folding_energy( pH=ph, reference=reference) + ddg = self.conformations[conformation].calculate_folding_energy( ph=ph, reference=reference) #info(ph,ddg) profile.append([ph, ddg]) @@ -220,7 +220,7 @@ class Molecular_container: def getChargeProfile(self, conformation='AVR', grid=[0., 14., .1]): charge_profile = [] for ph in propka.lib.make_grid(*grid): - q_unfolded, q_folded = self.conformations[conformation].calculate_charge(self.version.parameters, pH=ph) + q_unfolded, q_folded = self.conformations[conformation].calculate_charge(self.version.parameters, ph=ph) charge_profile.append([ph, q_unfolded, q_folded]) return charge_profile diff --git a/propka/pdb.py b/propka/pdb.py index 0b01588..a362189 100644 --- a/propka/pdb.py +++ b/propka/pdb.py @@ -8,7 +8,7 @@ import propka.lib from propka.lib import info, warning from propka.atom import Atom -from propka.conformation_container import Conformation_container +from propka.conformation_container import ConformationContainer expected_atom_numbers = {'ALA':5, 'ARG':11, @@ -39,7 +39,7 @@ def read_pdb(pdb_file, parameters, molecule): lines = get_atom_lines_from_pdb(pdb_file, ignore_residues = parameters.ignore_residues, keep_protons = molecule.options.keep_protons, chains=molecule.options.chains) for (name, atom) in lines: if not name in conformations.keys(): - conformations[name] = Conformation_container(name=name, parameters=parameters, molecular_container=molecule) + conformations[name] = ConformationContainer(name=name, parameters=parameters, molecular_container=molecule) conformations[name].add_atom(atom) # make a sorted list of conformation names @@ -260,7 +260,7 @@ def read_input(input_file, parameters,molecule): lines = get_atom_lines_from_input(input_file) for (name, atom) in lines: if not name in conformations.keys(): - conformations[name] = Conformation_container(name=name, parameters=parameters, molecular_container=molecule) + conformations[name] = ConformationContainer(name=name, parameters=parameters, molecular_container=molecule) conformations[name].add_atom(atom) # make a sorted list of conformation names From c985e0271338fab09b0296ef4d2c7ee38c31f8a0 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 23 May 2020 17:57:12 -0700 Subject: [PATCH 10/65] De-lint coupled_groups.py Some methods/attributes were changed but were checked in Google to make sure other codes were not affected. --- propka/conformation_container.py | 4 +- propka/coupled_groups.py | 385 +++++++++++++++++-------------- 2 files changed, 216 insertions(+), 173 deletions(-) diff --git a/propka/conformation_container.py b/propka/conformation_container.py index a205648..7b2f370 100644 --- a/propka/conformation_container.py +++ b/propka/conformation_container.py @@ -3,7 +3,7 @@ import functools import propka.ligand from propka.output import make_interaction_map from propka.determinant import Determinant -from propka.coupled_groups import nccg +from propka.coupled_groups import NCCG from propka.determinants import setBackBoneDeterminants, setIonDeterminants from propka.determinants import setDeterminants from propka.group import Group, is_group @@ -112,7 +112,7 @@ class ConformationContainer: if len(list(filter(lambda g: len(g.non_covalently_coupled_groups) > 0, self.get_titratable_groups()))) > 0: self.non_covalently_coupled_groups = True - nccg.identify_non_covalently_coupled_groups(self, verbose=verbose) + NCCG.identify_non_covalently_coupled_groups(self, verbose=verbose) # re-do the check if len(list(filter(lambda g: len(g.non_covalently_coupled_groups) > 0, self.get_titratable_groups()))) > 0: diff --git a/propka/coupled_groups.py b/propka/coupled_groups.py index 6b40aee..49777f8 100644 --- a/propka/coupled_groups.py +++ b/propka/coupled_groups.py @@ -1,113 +1,113 @@ - -from __future__ import division -from __future__ import print_function - -import math, propka.output, propka.group, propka.lib, itertools -from propka.lib import info, warning +"""Describe coupling between groups.""" +import itertools +import propka.lib +from propka.group import Group +from propka.output import make_interaction_map +from propka.lib import info -class non_covalently_couple_groups: +class NonCovalentlyCoupledGroups: + """Groups that are coupled without covalent bonding.""" def __init__(self): - self.parameters = None - - # self.do_intrinsic = False - # self.do_pair_wise = False self.do_prot_stat = True - return - - - # - # Methods for finding coupled groups - # - def is_coupled_protonation_state_probability(self, group1, group2, energy_method, return_on_fail=True): + def is_coupled_protonation_state_probability(self, group1, group2, + energy_method, + return_on_fail=True): + """Check whether two groups are energetically coupled. + Args: + group1: first group for interaction + group2: second group for interaction + energy_method: function for calculating energy + return_on_fail: return if part of the calculation fails + Returns: + dictionary describing coupling + """ # check if the interaction energy is high enough - interaction_energy = max(self.get_interaction(group1,group2), self.get_interaction(group2,group1)) - - if interaction_energy<=self.parameters.min_interaction_energy and return_on_fail: - return {'coupling_factor':-1.0} - + interaction_energy = max(self.get_interaction(group1, group2), + self.get_interaction(group2, group1)) + if interaction_energy <= self.parameters.min_interaction_energy \ + and return_on_fail: + return {'coupling_factor': -1.0} # calculate intrinsic pKa's, if not already done for group in [group1, group2]: if not hasattr(group, 'intrinsic_pKa'): group.calculate_intrinsic_pka() - - use_pH = self.parameters.pH + use_ph = self.parameters.pH if self.parameters.pH == 'variable': - use_pH = min(group1.pka_value, group2.pka_value) - - default_energy = energy_method(ph=use_pH, reference=self.parameters.reference) + use_ph = min(group1.pka_value, group2.pka_value) + default_energy = energy_method(ph=use_ph, + reference=self.parameters.reference) default_pka1 = group1.pka_value default_pka2 = group2.pka_value - # check that pka values are within relevant limits if max(default_pka1, default_pka2) < self.parameters.min_pka or \ min(default_pka1, default_pka2) > self.parameters.max_pka: if return_on_fail: - return {'coupling_factor':-1.0} - + return {'coupling_factor': -1.0} # Swap interactions and re-calculate pKa values self.swap_interactions([group1], [group2]) group1.calculate_total_pka() group2.calculate_total_pka() - # store swapped energy and pka's - swapped_energy = energy_method(ph=use_pH, reference=self.parameters.reference) + swapped_energy = energy_method(ph=use_ph, reference=self.parameters.reference) swapped_pka1 = group1.pka_value swapped_pka2 = group2.pka_value - pka_shift1 = swapped_pka1 - default_pka1 pka_shift2 = swapped_pka2 - default_pka2 - # Swap back to original protonation state self.swap_interactions([group1], [group2]) group1.calculate_total_pka() group2.calculate_total_pka() - # check difference in free energy - if abs(default_energy - swapped_energy) > self.parameters.max_free_energy_diff and return_on_fail: - return {'coupling_factor':-1.0} - + if abs(default_energy - swapped_energy) > self.parameters.max_free_energy_diff \ + and return_on_fail: + return {'coupling_factor': -1.0} # check pka shift - if max(abs(pka_shift1), abs(pka_shift2)) < self.parameters.min_swap_pka_shift and return_on_fail: - return {'coupling_factor':-1.0} - + if max(abs(pka_shift1), abs(pka_shift2)) < self.parameters.min_swap_pka_shift \ + and return_on_fail: + return {'coupling_factor': -1.0} # check intrinsic pka diff - if abs(group1.intrinsic_pKa - group2.intrinsic_pKa) > self.parameters.max_intrinsic_pKa_diff and return_on_fail: - return {'coupling_factor':-1.0} - + if abs(group1.intrinsic_pKa - group2.intrinsic_pKa) \ + > self.parameters.max_intrinsic_pKa_diff and return_on_fail: + return {'coupling_factor': -1.0} # if everything is OK, calculate the coupling factor and return all info - factor = self.get_free_energy_diff_factor(default_energy, swapped_energy)*\ - self.get_pka_diff_factor(group1.intrinsic_pKa, group2.intrinsic_pKa)*\ - self.get_interaction_factor(interaction_energy) - - return {'coupling_factor':factor, - 'default_energy':default_energy, - 'swapped_energy':swapped_energy, - 'interaction_energy':interaction_energy, - 'swapped_pka1':swapped_pka1, - 'swapped_pka2':swapped_pka2, - 'pka_shift1':pka_shift1, - 'pka_shift2':pka_shift2, - 'pH':use_pH} - - - - # - # Methods for calculating the coupling factor - # + factor = self.get_free_energy_diff_factor(default_energy, swapped_energy) \ + * self.get_pka_diff_factor(group1.intrinsic_pKa, group2.intrinsic_pKa) \ + * self.get_interaction_factor(interaction_energy) + return {'coupling_factor': factor, 'default_energy': default_energy, + 'swapped_energy': swapped_energy, + 'interaction_energy': interaction_energy, + 'swapped_pka1': swapped_pka1, 'swapped_pka2': swapped_pka2, + 'pka_shift1': pka_shift1, 'pka_shift2': pka_shift2, + 'pH': use_ph} def get_pka_diff_factor(self, pka1, pka2): + """Get scaling factor for difference between intrinsic pKa values. + + Args: + pka1: first pKa to compare + pka2: second pKa to compare + Returns: + float value of scaling factor + """ intrinsic_pka_diff = abs(pka1-pka2) res = 0.0 if intrinsic_pka_diff <= self.parameters.max_intrinsic_pKa_diff: res = 1-(intrinsic_pka_diff/self.parameters.max_intrinsic_pKa_diff)**2 - return res def get_free_energy_diff_factor(self, energy1, energy2): + """Get scaling factor for difference between free energies. + + Args: + energy1: first energy to compare + energy2: second energy to compare + Returns: + float value of scaling factor + """ free_energy_diff = abs(energy1-energy2) res = 0.0 if free_energy_diff <= self.parameters.max_free_energy_diff: @@ -115,19 +115,28 @@ class non_covalently_couple_groups: return res def get_interaction_factor(self, interaction_energy): + """Get scaling factor related to interaction energy. + + Args: + interaction_energy: interaction energy + Returns: + float value of scaling factor + """ res = 0.0 interaction_energy = abs(interaction_energy) if interaction_energy >= self.parameters.min_interaction_energy: - res = (interaction_energy-self.parameters.min_interaction_energy)/(1.0+interaction_energy-self.parameters.min_interaction_energy) - + res = (interaction_energy-self.parameters.min_interaction_energy) \ + / (1.0+interaction_energy-self.parameters.min_interaction_energy) return res + def identify_non_covalently_coupled_groups(self, conformation, + verbose=True): + """Find coupled residues in protein. - - - def identify_non_covalently_coupled_groups(self, conformation, verbose=True): - """ Finds coupled residues in protein """ - + Args: + conformation: protein conformation to test + verbose: verbose output (boolean) + """ self.parameters = conformation.parameters if verbose: info('') @@ -137,186 +146,220 @@ class non_covalently_couple_groups: info('-' * 103) info(' Detecting non-covalently coupled residues') info('-' * 103) - info(' Maximum pKa difference: %4.2f pKa units' % self.parameters.max_intrinsic_pKa_diff) - info(' Minimum interaction energy: %4.2f pKa units' % self.parameters.min_interaction_energy) - info(' Maximum free energy diff.: %4.2f pKa units' % self.parameters.max_free_energy_diff) - info(' Minimum swap pKa shift: %4.2f pKa units' % self.parameters.min_swap_pka_shift) + info(' Maximum pKa difference: %4.2f pKa units' \ + % self.parameters.max_intrinsic_pKa_diff) + info(' Minimum interaction energy: %4.2f pKa units' \ + % self.parameters.min_interaction_energy) + info(' Maximum free energy diff.: %4.2f pKa units' \ + % self.parameters.max_free_energy_diff) + info(' Minimum swap pKa shift: %4.2f pKa units' \ + % self.parameters.min_swap_pka_shift) info(' pH: %6s ' % str(self.parameters.pH)) info(' Reference: %s' % self.parameters.reference) info(' Min pKa: %4.2f' % self.parameters.min_pka) info(' Max pKa: %4.2f' % self.parameters.max_pka) info('') - # find coupled residues titratable_groups = conformation.get_titratable_groups() - if not conformation.non_covalently_coupled_groups: - for g1 in titratable_groups: - for g2 in titratable_groups: - if g1==g2: + for group1 in titratable_groups: + for group2 in titratable_groups: + if group1 == group2: break - - if not g1 in g2.non_covalently_coupled_groups and self.do_prot_stat: - data = self.is_coupled_protonation_state_probability(g1, g2,conformation.calculate_folding_energy) - if data['coupling_factor'] >0.0: - g1.couple_non_covalently(g2) - + if not group1 in group2.non_covalently_coupled_groups \ + and self.do_prot_stat: + data = self.\ + is_coupled_protonation_state_probability(group1, + group2, + conformation.\ + calculate_folding_energy) + if data['coupling_factor'] > 0.0: + group1.couple_non_covalently(group2) if verbose: self.print_out_swaps(conformation) - return + def print_out_swaps(self, conformation): + """Print out something having to do with coupling interactions. - def print_out_swaps(self, conformation, verbose=True): - map = propka.output.make_interaction_map('Non-covalent coupling map for %s'%conformation, - conformation.get_non_covalently_coupled_groups(), - lambda g1,g2: g1 in g2.non_covalently_coupled_groups) - info(map) - - for system in conformation.get_coupled_systems(conformation.get_non_covalently_coupled_groups(),propka.group.Group.get_non_covalently_coupled_groups): + Args: + conformation: conformation to print + """ + map_ = make_interaction_map('Non-covalent coupling map for %s' % conformation, + conformation.get_non_covalently_coupled_groups(), + lambda g1, g2: g1 in g2.non_covalently_coupled_groups) + info(map_) + for system in conformation.get_coupled_systems(conformation.\ + get_non_covalently_coupled_groups(), \ + Group.get_non_covalently_coupled_groups): self.print_system(conformation, list(system)) - return def print_system(self, conformation, system): + """Print out something about the system. + Args: + conformation: conformation to print + system: system to print + """ info('System containing %d groups:' % len(system)) - - # make list of interactions withi this system - interactions = list(itertools.combinations(system,2)) - + # make list of interactions within this system + interactions = list(itertools.combinations(system, 2)) # print out coupling info for each interaction coup_info = '' for interaction in interactions: - data = self.is_coupled_protonation_state_probability(interaction[0], interaction[1],conformation.calculate_folding_energy, return_on_fail=False) - coup_info += self.make_data_to_string(data,interaction[0],interaction[1])+'\n\n' + data = self.is_coupled_protonation_state_probability(interaction[0], \ + interaction[1], conformation.calculate_folding_energy, \ + return_on_fail=False) + coup_info += self.make_data_to_string(data, interaction[0], \ + interaction[1]) + '\n\n' info(coup_info) - # make list of possible combinations of swap to try out combinations = propka.lib.generate_combinations(interactions) - # Make possible swap combinations swap_info = '' - swap_info += self.print_determinants_section(system,'Original') - + swap_info += self.print_determinants_section(system, 'Original') for combination in combinations: # Tell the user what is swap in this combination swap_info += 'Swapping the following interactions:\n' for interaction in combination: - swap_info += ' %s %s\n'%(interaction[0].label, interaction[1].label) - + swap_info += ' %s %s\n' % (interaction[0].label, + interaction[1].label) # swap... for interaction in combination: - self.swap_interactions([interaction[0]],[interaction[1]]) - - swap_info += self.print_determinants_section(system,'Swapped') - # ...and swap back - #for interaction in combination: - # self.swap_interactions([interaction[0]], [interaction[1]]) - + self.swap_interactions([interaction[0]], [interaction[1]]) + swap_info += self.print_determinants_section(system, 'Swapped') info(swap_info) - return - # - # Interaction and swapping methods - # + @staticmethod + def get_interaction(group1, group2, include_side_chain_hbs=True): + """Get interaction energy between two groups. - def get_interaction(self, group1, group2, include_side_chain_hbs = True): + Args: + group1: first group for interaction + group2: second group for interaction + include_side_chain_hbs: include side-chain hydrogen bonds in energy + Returns: + interaction energy (float) + """ determinants = group1.determinants['coulomb'] if include_side_chain_hbs: - determinants = group1.determinants['sidechain'] + group1.determinants['coulomb'] - + determinants = group1.determinants['sidechain'] \ + + group1.determinants['coulomb'] interaction_energy = 0.0 for det in determinants: if group2 == det.group: interaction_energy += det.value - return interaction_energy - - def print_determinants_section(self, system, tag): + """Print determinants of system. + + Args: + system: set of groups + tag: something to add to output + Returns: + string with summary + """ all_labels = [g.label for g in system] - s = ' '+'-'*113+'\n' + str_ = ' ' + '-' * 113 + '\n' for group in system: - s += self.tagged_format(' %-8s|' % tag, group.getDeterminantString(), all_labels) + str_ += self.tagged_format(' %-8s|' % tag, + group.getDeterminantString(), + all_labels) + return str_ + '\n' - return s+'\n' + def swap_interactions(self, groups1, groups2, include_side_chain_hbs=True): + """Swap interactions between two groups. - def swap_interactions(self, groups1, groups2, include_side_chain_hbs = True): - - for i in range(len(groups1)): - group1 = groups1[i] + Args: + group1: first group to swap + group2: second group to swap + """ + for i, group1 in enumerate(groups1): group2 = groups2[i] - # swap the interactions! - self.transfer_determinant(group1.determinants['coulomb'], group2.determinants['coulomb'], group1.label, group2.label) + self.transfer_determinant(group1.determinants['coulomb'], + group2.determinants['coulomb'], + group1.label, group2.label) if include_side_chain_hbs: - self.transfer_determinant(group1.determinants['sidechain'], group2.determinants['sidechain'], group1.label, group2.label) - - # re-calculate pKa values + self.transfer_determinant(group1.determinants['sidechain'], + group2.determinants['sidechain'], + group1.label, group2.label) + # re-calculate pKa values group1.calculate_total_pka() group2.calculate_total_pka() - return + @staticmethod + def transfer_determinant(determinants1, determinants2, + label1, label2): + """Transfer information between two sets of determinants. - - def transfer_determinant(self, determinants1, determinants2, label1, label2): + Args: + determinants1: determinant list + determinants2: determinant list + label1: label for list 1 + label2: label for list 2 + """ # find out what to transfer... from1to2 = [] from2to1 = [] for det in determinants1: if det.label == label2: from1to2.append(det) - for det in determinants2: if det.label == label1: from2to1.append(det) - # ...and transfer it! for det in from1to2: det.label = label1 determinants2.append(det) determinants1.remove(det) - for det in from2to1: det.label = label2 determinants1.append(det) determinants2.remove(det) - return + @staticmethod + def tagged_format(tag, str_, labels): + """Tag a string. - # - # Output methods - # - - def tagged_format(self, tag, s, labels): - s = "%s %s"%(tag,s) - s = s.replace('\n','\n%s '%tag) + Args: + tag: tag to add + str_: string to tag + labels: labels to replace + Returns: + tagged string + """ + str_ = "%s %s" % (tag, str_) + str_ = str_.replace('\n', '\n%s ' % tag) for label in labels: - s = s.replace(label, '\033[31m%s\033[30m'%label) - return s+'\n' + str_ = str_.replace(label, '\033[31m%s\033[30m' % label) + return str_ + '\n' + @staticmethod + def make_data_to_string(data, group1, group2): + """Describe interaction between groups. - def make_data_to_string(self, data, group1, group2): - s = """ %s and %s coupled (prot.state): %5.2f + Args: + data: data about interactions + group1: first group + group2: second group + Returns: + formatted string with information. + """ + str_ = \ + """ %s and %s coupled (prot.state): %5.2f Energy levels: %6.2f, %6.2f (difference: %6.2f) at pH %6.2f Interaction energy: %6.2f Intrinsic pka's: %6.2f, %6.2f (difference: %6.2f) - Swapped pKa's: %6.2f, %6.2f (difference: %6.2f, %6.2f)"""%(group1.label, - group2.label, - data['coupling_factor'], - data['default_energy'], data['swapped_energy'], - data['default_energy'] - data['swapped_energy'], - data['pH'], - data['interaction_energy'], - group1.intrinsic_pKa, - group2.intrinsic_pKa, - group1.intrinsic_pKa-group2.intrinsic_pKa, - data['swapped_pka1'], - data['swapped_pka2'], - data['pka_shift1'], - data['pka_shift2']) + Swapped pKa's: %6.2f, %6.2f (difference: %6.2f, %6.2f)""" % \ + (group1.label, group2.label, data['coupling_factor'], + data['default_energy'], data['swapped_energy'], + data['default_energy'] - data['swapped_energy'], data['pH'], + data['interaction_energy'], group1.intrinsic_pKa, group2.intrinsic_pKa, + group1.intrinsic_pKa-group2.intrinsic_pKa, data['swapped_pka1'], + data['swapped_pka2'], data['pka_shift1'], data['pka_shift2']) - return s + return str_ -nccg = non_covalently_couple_groups() +NCCG = NonCovalentlyCoupledGroups() From 25648327cecdd3536f8c246aabf02c89ee118504 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 24 May 2020 08:19:32 -0700 Subject: [PATCH 11/65] De-lint determinant.py. --- propka/determinant.py | 34 +++++++++++++++++++++------------- propka/determinants.py | 4 ++++ 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/propka/determinant.py b/propka/determinant.py index 5085ca1..2b29d08 100644 --- a/propka/determinant.py +++ b/propka/determinant.py @@ -1,29 +1,37 @@ +"""Holds the Determinant class + +TODO - it is confusing to have both `determinant.py` and `determinants.py`. +Should these be merged? +""" -from __future__ import division -from __future__ import print_function class Determinant: - """ - Determinant class - set up for later structurization + """Determinant class. + + Appears to be a container for storing information and values about + groups that interact to influence titration states. + + TODO - figure out what this class does. """ def __init__(self, group, value): - """ - Contructer of determinant object - simple, but helps in creating structure! + """Initialize the object. + + Args: + group: group associated with Determinant object + value: value to assign to group """ self.group = group self.label = group.label self.value = value - return - def add(self, value): - """ - adding a value to determinant + """Increment determinant value. + + Args: + value: value to add to determinant """ self.value += value - return - def __str__(self): - return '%s: %8.2f'%(self.label,self.value) + return '%s: %8.2f' % (self.label, self.value) diff --git a/propka/determinants.py b/propka/determinants.py index 658905a..52da557 100644 --- a/propka/determinants.py +++ b/propka/determinants.py @@ -9,6 +9,10 @@ import propka.calculations from propka.determinant import Determinant +# TODO - it is confusing to have both `determinant.py` and `determinants.py`. +# Should these be merged? + + def setDeterminants(propka_groups, version=None, options=None): """ adding side-chain and coulomb determinants/perturbations to all residues - note, backbone determinants are set separately From 476ab1f276ac37ddadc1b029dd02e0d990c2b1e8 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 24 May 2020 08:53:51 -0700 Subject: [PATCH 12/65] De-lint determinants.py. Public methods/members names were changed. These were checked against Google for potential impact on other packages. --- propka/calculations.py | 2 +- propka/conformation_container.py | 10 +- propka/determinants.py | 321 +++++++++++++++++-------------- propka/iterative.py | 2 +- 4 files changed, 185 insertions(+), 150 deletions(-) diff --git a/propka/calculations.py b/propka/calculations.py index ea2a54b..5bf2449 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -636,7 +636,7 @@ def check_coulomb_pair(parameters, group1, group2, dist): num_volume = group1.Nmass + group2.Nmass do_coulomb = True # check if both groups are titratable (ions are taken care of in - # determinants::setIonDeterminants) + # determinants::set_ion_determinants) if not (group1.titratable and group2.titratable): do_coulomb = False # check if the distance is not too big diff --git a/propka/conformation_container.py b/propka/conformation_container.py index 7b2f370..107a0a8 100644 --- a/propka/conformation_container.py +++ b/propka/conformation_container.py @@ -4,8 +4,8 @@ import propka.ligand from propka.output import make_interaction_map from propka.determinant import Determinant from propka.coupled_groups import NCCG -from propka.determinants import setBackBoneDeterminants, setIonDeterminants -from propka.determinants import setDeterminants +from propka.determinants import set_backbone_determinants, set_ion_determinants +from propka.determinants import set_determinants from propka.group import Group, is_group from propka.lib import info @@ -190,15 +190,15 @@ class ConformationContainer: for group in self.get_titratable_groups() + self.get_ions(): version.calculate_desolvation(group) # calculate backbone interactions - setBackBoneDeterminants(self.get_titratable_groups(), + set_backbone_determinants(self.get_titratable_groups(), self.get_backbone_groups(), version) # setting ion determinants - setIonDeterminants(self, version) + set_ion_determinants(self, version) # calculating the back-bone reorganization/desolvation term version.calculatebackbone_reorganization(self) # setting remaining non-iterative and iterative side-chain & Coulomb # interaction determinants - setDeterminants(self.get_sidechain_groups(), version=version, + set_determinants(self.get_sidechain_groups(), version=version, options=options) # calculating the total pKa values for group in self.groups: diff --git a/propka/determinants.py b/propka/determinants.py index 52da557..7b75d3f 100644 --- a/propka/determinants.py +++ b/propka/determinants.py @@ -1,23 +1,33 @@ +"""Functions to manipulate Determinant objects. -from __future__ import division -from __future__ import print_function - -import math, time - -import propka.iterative, propka.lib, propka.vector_algebra -import propka.calculations +TODO - it is confusing to have both `determinant.py` and `determinants.py`. +Should these be merged? +""" +import math +import propka.iterative +import propka.lib +import propka.vector_algebra +from propka.calculations import squared_distance, get_smallest_distance +from propka.calculations import angle_distance_factors, hydrogen_bond_energy from propka.determinant import Determinant -# TODO - it is confusing to have both `determinant.py` and `determinants.py`. -# Should these be merged? +# Cutoff for angle factor +# TODO - this constant appears elsewhere in the package. +# It should be moved to a configuration file. +FANGLE_MIN = 0.001 -def setDeterminants(propka_groups, version=None, options=None): +def set_determinants(propka_groups, version=None, options=None): + """Add side-chain and coulomb determinants/perturbations to all residues. + + NOTE - backbone determinants are set separately + + Args: + propka_groups: groups to adjust + version: version object + options: options object """ - adding side-chain and coulomb determinants/perturbations to all residues - note, backbone determinants are set separately - """ - iterative_interactions = [] # --- NonIterative section ---# for group1 in propka_groups: @@ -27,172 +37,198 @@ def setDeterminants(propka_groups, version=None, options=None): # do not calculate interactions for coupled groups if group2 in group1.covalently_coupled_groups: break - distance = propka.calculations.distance(group1, group2) - if distance < version.parameters.coulomb_cutoff2: - interaction_type = version.parameters.interaction_matrix.get_value(group1.type,group2.type) + interaction_type = version.parameters.interaction_matrix.get_value(group1.type, + group2.type) if interaction_type == 'I': - propka.iterative.addtoDeterminantList(group1, group2, distance, iterative_interactions, version=version) + propka.iterative.addtoDeterminantList(group1, group2, + distance, + iterative_interactions, + version=version) elif interaction_type == 'N': - addDeterminants(group1, group2, distance, version) - - + add_determinants(group1, group2, distance, version) # --- Iterative section ---# - propka.iterative.addDeterminants(iterative_interactions, version, options=options) + propka.iterative.add_determinants(iterative_interactions, version, + options=options) -def addDeterminants(group1, group2, distance, version): +def add_determinants(group1, group2, distance, version): + """Add determinants and perturbations for distance(R1, R2) < coulomb_cutoff. + + Args: + group1: first group to add + group2: second group to add + distance: distance between groups + version: version object """ - adding determinants/perturbations, distance(R1, R2) < coulomb_cutoff always - """ - # side-chain determinant - addSidechainDeterminants(group1, group2, version) - + add_sidechain_determinants(group1, group2, version) # Coulomb determinant - addCoulombDeterminants(group1, group2, distance, version) + add_coulomb_determinants(group1, group2, distance, version) - return -def addSidechainDeterminants(group1, group2, version=None): +def add_sidechain_determinants(group1, group2, version=None): + """Add side-chain determinants and perturbations. + + NOTE - res_num1 > res_num2 + + Args: + group1: first group to add + group2: second group to add + version: version object """ - adding side-chain determinants/perturbations - Note, res_num1 > res_num2 - """ - hbond_interaction = version.hydrogen_bond_interaction(group1, group2) - if hbond_interaction: - if group1.charge == group2.charge: # acid pair or base pair if group1.model_pka < group2.model_pka: - newDeterminant1 = Determinant(group2, -hbond_interaction) - newDeterminant2 = Determinant(group1, hbond_interaction) + new_determinant1 = Determinant(group2, -hbond_interaction) + new_determinant2 = Determinant(group1, hbond_interaction) else: - newDeterminant1 = Determinant(group2, hbond_interaction) - newDeterminant2 = Determinant(group1, -hbond_interaction) + new_determinant1 = Determinant(group2, hbond_interaction) + new_determinant2 = Determinant(group1, -hbond_interaction) else: - newDeterminant1 = Determinant(group2, hbond_interaction*group1.charge) - newDeterminant2 = Determinant(group1, hbond_interaction*group2.charge) + new_determinant1 = Determinant(group2, hbond_interaction*group1.charge) + new_determinant2 = Determinant(group1, hbond_interaction*group2.charge) + group1.determinants['sidechain'].append(new_determinant1) + group2.determinants['sidechain'].append(new_determinant2) - group1.determinants['sidechain'].append(newDeterminant1) - group2.determinants['sidechain'].append(newDeterminant2) - return +def add_coulomb_determinants(group1, group2, distance, version): + """Add non-iterative Coulomb determinants and perturbations. -def addCoulombDeterminants(group1, group2, distance, version): + Args: + group1: first group to add + group2: second group to add + distance: distance between groups + version: version object """ - adding NonIterative Coulomb determinants/perturbations - """ - - coulomb_interaction = version.electrostatic_interaction(group1, group2, distance) - + coulomb_interaction = version.electrostatic_interaction(group1, group2, + distance) if coulomb_interaction: - Q1 = group1.charge - Q2 = group2.charge - + q1 = group1.charge + q2 = group2.charge # assigning the Coulombic interaction - if Q1 < 0.0 and Q2 < 0.0: - """ both are acids """ - addCoulombAcidPair(group1, group2, coulomb_interaction) - elif Q1 > 0.0 and Q2 > 0.0: - """ both are bases """ - addCoulombBasePair(group1, group2, coulomb_interaction) + if q1 < 0.0 and q2 < 0.0: + # both are acids + add_coulomb_acid_pair(group1, group2, coulomb_interaction) + elif q1 > 0.0 and q2 > 0.0: + # both are bases + add_coulomb_base_pair(group1, group2, coulomb_interaction) else: - """ one of each """ - addCoulombIonPair(group1, group2, coulomb_interaction) - - return + # one of each + add_coulomb_ion_pair(group1, group2, coulomb_interaction) -def addCoulombAcidPair(object1, object2, value): +def add_coulomb_acid_pair(object1, object2, value): + """Add the Coulomb interaction (an acid pair). + + The higher pKa is raised. + + Args: + object1: first part of pair + object2: second part of pair + value: determinant value """ - Adding the Coulomb interaction (an acid pair): - the higher pKa is raised - """ - if object1.model_pka > object2.model_pka: - newDeterminant = Determinant(object2, value) - object1.determinants['coulomb'].append(newDeterminant) + new_determinant = Determinant(object2, value) + object1.determinants['coulomb'].append(new_determinant) else: - newDeterminant = Determinant(object1, value) - object2.determinants['coulomb'].append(newDeterminant) + new_determinant = Determinant(object1, value) + object2.determinants['coulomb'].append(new_determinant) -def addCoulombBasePair(object1, object2, value): - """ - Adding the Coulomb interaction (a base pair): - the lower pKa is lowered +def add_coulomb_base_pair(object1, object2, value): + """Add the Coulomb interaction (a base pair). + + The lower pKa is lowered. + + Args: + object1: first part of pair + object2: second part of pair + value: determinant value """ if object1.model_pka < object2.model_pka: - newDeterminant = Determinant(object2, -value) - object1.determinants['coulomb'].append(newDeterminant) + new_determinant = Determinant(object2, -value) + object1.determinants['coulomb'].append(new_determinant) else: - newDeterminant = Determinant(object1, -value) - object2.determinants['coulomb'].append(newDeterminant) + new_determinant = Determinant(object1, -value) + object2.determinants['coulomb'].append(new_determinant) -def addCoulombIonPair(object1, object2, value): +def add_coulomb_ion_pair(object1, object2, value): + """Add the Coulomb interaction (an acid-base pair). + + The pKa of the acid is lowered & the pKa of the base is raised. + + Args: + object1: first part of pair + object2: second part of pair + value: determinant value """ - Adding the Coulomb interaction (an acid-base pair): - the pKa of the acid is lowered & the pKa of the base is raised - """ - # residue1 - Q1 = object1.charge - newDeterminant = Determinant(object2, Q1*value) - object1.determinants['coulomb'].append(newDeterminant) - + q1 = object1.charge + new_determinant = Determinant(object2, q1*value) + object1.determinants['coulomb'].append(new_determinant) # residue2 - Q2 = object2.charge - newDeterminant = Determinant(object1, Q2*value) - object2.determinants['coulomb'].append(newDeterminant) + q2 = object2.charge + new_determinant = Determinant(object1, q2*value) + object2.determinants['coulomb'].append(new_determinant) +def set_ion_determinants(conformation_container, version): + """Add ion determinants and perturbations. - -def setIonDeterminants(conformation_container, version): - """ - adding ion determinants/perturbations + Args: + conformation_container: conformation to set + version: version object """ for titratable_group in conformation_container.get_titratable_groups(): for ion_group in conformation_container.get_ions(): - squared_distance = propka.calculations.squared_distance(titratable_group, ion_group) - if squared_distance < version.parameters.coulomb_cutoff2_squared: - weight = version.calculate_pair_weight(titratable_group.Nmass, ion_group.Nmass) - # the pKa of both acids and bases are shifted up by negative ions (and vice versa) - value = (-ion_group.charge) * version.calculate_coulomb_energy(math.sqrt(squared_distance), weight) - newDeterminant = Determinant(ion_group, value) - titratable_group.determinants['coulomb'].append(newDeterminant) + dist_sq = squared_distance(titratable_group, ion_group) + if dist_sq < version.parameters.coulomb_cutoff2_squared: + weight = version.calculate_pair_weight(titratable_group.Nmass, + ion_group.Nmass) + # the pKa of both acids and bases are shifted up by negative + # ions (and vice versa) + value = (-ion_group.charge) \ + * version.calculate_coulomb_energy(math.sqrt(dist_sq), + weight) + new_det = Determinant(ion_group, value) + titratable_group.determinants['coulomb'].append(new_det) - return -def setBackBoneDeterminants(titratable_groups, backbone_groups, version): +def set_backbone_determinants(titratable_groups, backbone_groups, version): + """Set determinants between titrable and backbone groups. + Args: + titratable_groups: list of titratable groups + backbone_groups: list of backbone groups + version: version object + """ for titratable_group in titratable_groups: - titratable_group_interaction_atoms = titratable_group.interaction_atoms_for_acids + titratable_group_interaction_atoms \ + = titratable_group.interaction_atoms_for_acids if not titratable_group_interaction_atoms: continue - # find out which backbone groups this titratable is interacting with for backbone_group in backbone_groups: # find the interacting atoms - backbone_interaction_atoms = backbone_group.get_interaction_atoms(titratable_group) + backbone_interaction_atoms \ + = backbone_group.get_interaction_atoms(titratable_group) if not backbone_interaction_atoms: continue - # find the smallest distance - [backbone_atom, distance, titratable_atom] = propka.calculations.get_smallest_distance(backbone_interaction_atoms, - titratable_group_interaction_atoms) + [backbone_atom, distance, titratable_atom] \ + = get_smallest_distance(backbone_interaction_atoms, \ + titratable_group_interaction_atoms) # get the parameters - parameters = version.get_backbone_hydrogen_bond_parameters(backbone_atom, titratable_atom) + parameters = version.get_backbone_hydrogen_bond_parameters(backbone_atom, + titratable_atom) if not parameters: continue - [dpKa_max, [cutoff1, cutoff2]] = parameters - - + [dpka_max, [cutoff1, cutoff2]] = parameters if distance < cutoff2: # calculate angle factor f_angle = 1.0 @@ -206,19 +242,20 @@ def setBackBoneDeterminants(titratable_groups, backbone_groups, version): # || # C if backbone_group.type == 'BBC': - if titratable_group.type in version.parameters.angular_dependent_sidechain_interactions: + if titratable_group.type \ + in version.parameters.angular_dependent_sidechain_interactions: if titratable_atom.element == 'H': - heavy_atom = titratable_atom.bonded_atoms[0] + heavy_atom = titratable_atom.bonded_atoms[0] hydrogen_atom = titratable_atom - [d1, f_angle, d2] = propka.calculations.angle_distance_factors(atom1=heavy_atom, - atom2=hydrogen_atom, - atom3=backbone_atom) + [_, f_angle, _] = angle_distance_factors(atom1=heavy_atom, + atom2=hydrogen_atom, + atom3=backbone_atom) else: - # Either the structure is corrupt (no hydrogen), or the heavy atom is closer to - # the titratable atom than the hydrogen. In either case we set the angle factor - # to 0 + # Either the structure is corrupt (no hydrogen), + # or the heavy atom is closer to the titratable + # atom than the hydrogen. In either case we set the + # angle factor to 0 f_angle = 0.0 - # for BBN groups, the hydrogen is on the backbone group # # Titra. @@ -229,23 +266,21 @@ def setBackBoneDeterminants(titratable_groups, backbone_groups, version): # / \ if backbone_group.type == 'BBN': if backbone_atom.element == 'H': - backbone_N = backbone_atom.bonded_atoms[0] - backbone_H = backbone_atom - [d1, f_angle, d2] = propka.calculations.angle_distance_factors(atom1=titratable_atom, - atom2=backbone_H, - atom3=backbone_N) + backbone_n = backbone_atom.bonded_atoms[0] + backbone_h = backbone_atom + [_, f_angle, _] = angle_distance_factors(atom1=titratable_atom, + atom2=backbone_h, + atom3=backbone_n) else: - # Either the structure is corrupt (no hydrogen), or the heavy atom is closer to - # the titratable atom than the hydrogen. In either case we set the angle factor - # to 0 + # Either the structure is corrupt (no hydrogen), or the + # heavy atom is closer to the titratable atom than the + # hydrogen. In either case we set the angle factor to 0 f_angle = 0.0 - - - if f_angle > 0.001: - value = titratable_group.charge * propka.calculations.hydrogen_bond_energy(distance, dpKa_max, [cutoff1,cutoff2], f_angle) - - newDeterminant = Determinant(backbone_group, value) - titratable_group.determinants['backbone'].append(newDeterminant) - - - return + if f_angle > FANGLE_MIN: + value = titratable_group.charge * hydrogen_bond_energy(distance, + dpka_max, + [cutoff1, cutoff2], + f_angle) + new_determinant = Determinant(backbone_group, value) + titratable_group.determinants['backbone'].append(new_determinant) + \ No newline at end of file diff --git a/propka/iterative.py b/propka/iterative.py index 3614d93..132d42b 100644 --- a/propka/iterative.py +++ b/propka/iterative.py @@ -166,7 +166,7 @@ def addIterativeIonPair(object1, object2, interaction, version): object2.determinants['sidechain'].append(interaction) -def addDeterminants(iterative_interactions, version, options=None): +def add_determinants(iterative_interactions, version, options=None): """ The iterative pKa scheme. Later it is all added in 'calculateTotalPKA' """ From 6cbcac3be997c28f9a13974bcd4ba536df245cd8 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 24 May 2020 11:12:47 -0700 Subject: [PATCH 13/65] Make de-linting easier. --- .pylintrc | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .pylintrc diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..fb68c9c --- /dev/null +++ b/.pylintrc @@ -0,0 +1,2 @@ +[MESSAGES CONTROL] +disable = no-else-return \ No newline at end of file From a534d97016ae42a3459c6f2db29a9c3e451c828a Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 24 May 2020 11:13:06 -0700 Subject: [PATCH 14/65] De-lint groups.py I did my best (via Google) to make sure that renamed public members and methods had minimal impact on others' code. --- propka.cfg | 2 +- propka/calculations.py | 32 +- propka/coupled_groups.py | 20 +- propka/determinants.py | 4 +- propka/group.py | 1279 ++++++++++++++++++++------------------ propka/iterative.py | 4 +- propka/output.py | 4 +- propka/parameters.py | 2 +- propka/propka.cfg | 2 +- propka/version.py | 4 +- 10 files changed, 703 insertions(+), 650 deletions(-) diff --git a/propka.cfg b/propka.cfg index a12e38a..58b1aec 100644 --- a/propka.cfg +++ b/propka.cfg @@ -342,7 +342,7 @@ common_charge_centre 0 remove_penalised_group 1 # non-covalent coupling -max_intrinsic_pKa_diff 2.0 +max_intrinsic_pka_diff 2.0 min_interaction_energy 0.5 max_free_energy_diff 1.0 min_swap_pka_shift 1.0 diff --git a/propka/calculations.py b/propka/calculations.py index 5bf2449..c2f15b5 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -89,8 +89,8 @@ def setup_bonding_and_protonation(parameters, molecular_container): my_bond_maker.add_pi_electron_information(molecular_container) # Protonate atoms if molecular_container.options.protonate_all: - my_protonator = propka.protonate.Protonate(verbose=False) - my_protonator.protonate(molecular_container) + PROTONATOR = propka.protonate.Protonate(verbose=False) + PROTONATOR.protonate(molecular_container) def setup_bonding(molecular_container): @@ -386,10 +386,10 @@ def radial_volume_desolvation(parameters, group): """ all_atoms = group.atom.conformation_container.get_non_hydrogen_atoms() volume = 0.0 - # TODO - Nathan really wants to rename the Nmass variable. + # TODO - Nathan really wants to rename the num_volume variable. # He had to re-read the original paper to figure out what it was. # A better name would be num_volume. - group.Nmass = 0 + group.num_volume = 0 min_dist_4th = MIN_DISTANCE_4TH for atom in all_atoms: # ignore atoms in the same residue @@ -410,11 +410,11 @@ def radial_volume_desolvation(parameters, group): volume += dv_inc # buried if sq_dist < parameters.buried_cutoff_squared: - group.Nmass += 1 - group.buried = calculate_weight(parameters, group.Nmass) + group.num_volume += 1 + group.buried = calculate_weight(parameters, group.num_volume) scale_factor = calculate_scale_factor(parameters, group.buried) volume_after_allowance = max(0.00, volume-parameters.desolvationAllowance) - group.Emass = group.charge * parameters.desolvationPrefactor \ + group.energy_volume = group.charge * parameters.desolvationPrefactor \ * volume_after_allowance * scale_factor @@ -589,7 +589,7 @@ def hydrogen_bond_interaction(group1, group2, version): # is closer to the titratable atom than the hydrogen. In either # case, we set the angle factor to 0 f_angle = 0.0 - weight = version.calculate_pair_weight(group1.Nmass, group2.Nmass) + weight = version.calculate_pair_weight(group1.num_volume, group2.num_volume) exception, value = version.check_exceptions(group1, group2) if exception: # Do nothing, value should have been assigned. @@ -614,7 +614,7 @@ def electrostatic_interaction(group1, group2, dist, version): # check if we should do coulomb interaction at all if not version.check_coulomb_pair(group1, group2, dist): return None - weight = version.calculate_pair_weight(group1.Nmass, group2.Nmass) + weight = version.calculate_pair_weight(group1.num_volume, group2.num_volume) value = version.calculate_coulomb_energy(dist, weight) return value @@ -633,7 +633,7 @@ def check_coulomb_pair(parameters, group1, group2, dist): Returns: Boolean """ - num_volume = group1.Nmass + group2.Nmass + num_volume = group1.num_volume + group2.num_volume do_coulomb = True # check if both groups are titratable (ions are taken care of in # determinants::set_ion_determinants) @@ -695,7 +695,7 @@ def backbone_reorganization(parameters, conformation): value = 1.0 - (dist-UNK_BACKBONE_DISTANCE2) \ / (UNK_BACKBONE_DISTANCE1-UNK_BACKBONE_DISTANCE2) dpka += UNK_PKA_SCALING2*min(1.0, value) - titratable_group.Elocl = dpka*weight + titratable_group.energy_local = dpka*weight def check_exceptions(version, group1, group2): @@ -799,7 +799,7 @@ def check_coo_coo_exception(group1, group2, version): closest_atom2) f_angle = 1.00 value = hydrogen_bond_energy(dist, dpka_max, cutoff, f_angle) - weight = calculate_pair_weight(version.parameters, group1.Nmass, group2.Nmass) + weight = calculate_pair_weight(version.parameters, group1.num_volume, group2.num_volume) value = value * (1.0 + weight) return exception, value @@ -816,7 +816,7 @@ def check_coo_his_exception(group1, group2, version): 2. value associated with atypical interaction (None if Boolean is False) """ exception = False - if check_buried(group1.Nmass, group2.Nmass): + if check_buried(group1.num_volume, group2.num_volume): exception = True return exception, version.parameters.COO_HIS_exception @@ -833,7 +833,7 @@ def check_oco_his_exception(group1, group2, version): 2. value associated with atypical interaction (None if Boolean is False) """ exception = False - if check_buried(group1.Nmass, group2.Nmass): + if check_buried(group1.num_volume, group2.num_volume): exception = True return exception, version.parameters.OCO_HIS_exception @@ -850,7 +850,7 @@ def check_cys_his_exception(group1, group2, version): 2. value associated with atypical interaction (None if Boolean is False) """ exception = False - if check_buried(group1.Nmass, group2.Nmass): + if check_buried(group1.num_volume, group2.num_volume): exception = True return exception, version.parameters.CYS_HIS_exception @@ -867,7 +867,7 @@ def check_cys_cys_exception(group1, group2, version): 2. value associated with atypical interaction (None if Boolean is False) """ exception = False - if check_buried(group1.Nmass, group2.Nmass): + if check_buried(group1.num_volume, group2.num_volume): exception = True return exception, version.parameters.CYS_CYS_exception diff --git a/propka/coupled_groups.py b/propka/coupled_groups.py index 49777f8..54197ab 100644 --- a/propka/coupled_groups.py +++ b/propka/coupled_groups.py @@ -33,7 +33,7 @@ class NonCovalentlyCoupledGroups: return {'coupling_factor': -1.0} # calculate intrinsic pKa's, if not already done for group in [group1, group2]: - if not hasattr(group, 'intrinsic_pKa'): + if group.intrinsic_pka is None: group.calculate_intrinsic_pka() use_ph = self.parameters.pH if self.parameters.pH == 'variable': @@ -70,12 +70,12 @@ class NonCovalentlyCoupledGroups: and return_on_fail: return {'coupling_factor': -1.0} # check intrinsic pka diff - if abs(group1.intrinsic_pKa - group2.intrinsic_pKa) \ - > self.parameters.max_intrinsic_pKa_diff and return_on_fail: + if abs(group1.intrinsic_pka - group2.intrinsic_pka) \ + > self.parameters.max_intrinsic_pka_diff and return_on_fail: return {'coupling_factor': -1.0} # if everything is OK, calculate the coupling factor and return all info factor = self.get_free_energy_diff_factor(default_energy, swapped_energy) \ - * self.get_pka_diff_factor(group1.intrinsic_pKa, group2.intrinsic_pKa) \ + * self.get_pka_diff_factor(group1.intrinsic_pka, group2.intrinsic_pka) \ * self.get_interaction_factor(interaction_energy) return {'coupling_factor': factor, 'default_energy': default_energy, 'swapped_energy': swapped_energy, @@ -95,8 +95,8 @@ class NonCovalentlyCoupledGroups: """ intrinsic_pka_diff = abs(pka1-pka2) res = 0.0 - if intrinsic_pka_diff <= self.parameters.max_intrinsic_pKa_diff: - res = 1-(intrinsic_pka_diff/self.parameters.max_intrinsic_pKa_diff)**2 + if intrinsic_pka_diff <= self.parameters.max_intrinsic_pka_diff: + res = 1-(intrinsic_pka_diff/self.parameters.max_intrinsic_pka_diff)**2 return res def get_free_energy_diff_factor(self, energy1, energy2): @@ -147,7 +147,7 @@ class NonCovalentlyCoupledGroups: info(' Detecting non-covalently coupled residues') info('-' * 103) info(' Maximum pKa difference: %4.2f pKa units' \ - % self.parameters.max_intrinsic_pKa_diff) + % self.parameters.max_intrinsic_pka_diff) info(' Minimum interaction energy: %4.2f pKa units' \ % self.parameters.min_interaction_energy) info(' Maximum free energy diff.: %4.2f pKa units' \ @@ -263,7 +263,7 @@ class NonCovalentlyCoupledGroups: str_ = ' ' + '-' * 113 + '\n' for group in system: str_ += self.tagged_format(' %-8s|' % tag, - group.getDeterminantString(), + group.get_determinant_string(), all_labels) return str_ + '\n' @@ -355,8 +355,8 @@ class NonCovalentlyCoupledGroups: (group1.label, group2.label, data['coupling_factor'], data['default_energy'], data['swapped_energy'], data['default_energy'] - data['swapped_energy'], data['pH'], - data['interaction_energy'], group1.intrinsic_pKa, group2.intrinsic_pKa, - group1.intrinsic_pKa-group2.intrinsic_pKa, data['swapped_pka1'], + data['interaction_energy'], group1.intrinsic_pka, group2.intrinsic_pka, + group1.intrinsic_pka-group2.intrinsic_pka, data['swapped_pka1'], data['swapped_pka2'], data['pka_shift1'], data['pka_shift2']) return str_ diff --git a/propka/determinants.py b/propka/determinants.py index 7b75d3f..101547a 100644 --- a/propka/determinants.py +++ b/propka/determinants.py @@ -188,8 +188,8 @@ def set_ion_determinants(conformation_container, version): for ion_group in conformation_container.get_ions(): dist_sq = squared_distance(titratable_group, ion_group) if dist_sq < version.parameters.coulomb_cutoff2_squared: - weight = version.calculate_pair_weight(titratable_group.Nmass, - ion_group.Nmass) + weight = version.calculate_pair_weight(titratable_group.num_volume, + ion_group.num_volume) # the pKa of both acids and bases are shifted up by negative # ions (and vice versa) value = (-ion_group.charge) \ diff --git a/propka/group.py b/propka/group.py index c524dac..49e2552 100644 --- a/propka/group.py +++ b/propka/group.py @@ -1,336 +1,332 @@ -# -# Class for storing groups important for propka calculations -# - -from __future__ import division -from __future__ import print_function - -import propka.ligand, propka.determinant, propka.ligand_pka_values, math, propka.protonate +"""Routines and classes for storing groups important to PROPKA calculations.""" +import math +import propka.ligand +import propka.protonate +from propka.ligand_pka_values import ligand_pka_values +from propka.determinant import Determinant from propka.lib import info, warning -my_protonator = propka.protonate.Protonate(verbose=False) - -expected_atoms_acid_interactions = { - 'COO':{'O':2}, - 'HIS':{'H':2, 'N':2}, - 'CYS':{'S':1}, - 'TYR':{'O':1}, - 'LYS':{'N':1}, - 'ARG':{'H':5, 'N':3}, - 'ROH':{'O':1}, - 'AMD':{'H':2, 'N':1}, - 'TRP':{'H':1, 'N':1}, - 'N+': {'N':1}, - 'C-': {'O':2}, - 'BBN':{'H':1, 'N':1,}, - 'BBC':{'O':1}, - 'NAR':{'H':1, 'N':1}, - 'NAM':{'H':1, 'N':1}, - 'F': {'F':1}, - 'Cl': {'Cl':1}, - 'OH': {'H':1, 'O':1}, - 'OP': {'O':1}, - 'O3': {'O':1}, - 'O2': {'O':1}, - 'SH': {'S':1}, - 'CG': {'H':5, 'N':3}, - 'C2N':{'H':4, 'N':2}, - 'OCO':{'O':2}, - 'N30':{'H':4, 'N':1}, - 'N31':{'H':3, 'N':1}, - 'N32':{'H':2, 'N':1}, - 'N33':{'H':1, 'N':1}, - 'NP1':{'H':2, 'N':1}, - 'N1' :{'N':1} -} - -expected_atoms_base_interactions = { - 'COO':{'O':2}, - 'HIS':{'N':2}, - 'CYS':{'S':1}, - 'TYR':{'O':1}, - 'LYS':{'N':1}, - 'ARG':{'N':3}, - 'ROH':{'O':1}, - 'AMD':{'O':1}, - 'TRP':{'N':1}, - 'N+': {'N':1}, - 'C-': {'O':2}, - 'BBN':{'H':1, 'N':1,}, - 'BBC':{'O':1}, - 'NAR':{'H':1, 'N':1}, - 'NAM':{'H':1, 'N':1}, - 'F': {'F':1}, - 'Cl': {'Cl':1}, - 'OH': {'H':1, 'O':1}, - 'OP': {'O':1}, - 'O3': {'O':1}, - 'O2': {'O':1}, - 'SH': {'S':1}, - 'CG': {'N':3}, - 'C2N':{'N':2}, - 'OCO':{'O':2}, - 'N30':{'N':1}, - 'N31':{'N':1}, - 'N32':{'N':1}, - 'N33':{'N':1}, - 'NP1':{'N':1}, - 'N1' :{'N':1} -} +# Constants that start with "UNK_" are a mystery to me +UNK_PKA_SCALING = -1.36 +PROTONATOR = propka.protonate.Protonate(verbose=False) +EXPECTED_ATOMS_ACID_INTERACTIONS = {'COO': {'O': 2}, 'HIS': {'H': 2, 'N': 2}, + 'CYS': {'S': 1}, 'TYR': {'O': 1}, + 'LYS': {'N': 1}, 'ARG': {'H': 5, 'N': 3}, + 'ROH': {'O': 1}, 'AMD': {'H': 2, 'N': 1}, + 'TRP': {'H': 1, 'N': 1}, 'N+': {'N': 1}, + 'C-': {'O': 2}, 'BBN': {'H': 1, 'N': 1,}, + 'BBC': {'O': 1}, 'NAR': {'H': 1, 'N': 1}, + 'NAM': {'H': 1, 'N': 1}, 'F': {'F': 1}, + 'Cl': {'Cl': 1}, 'OH': {'H': 1, 'O': 1}, + 'OP': {'O': 1}, 'O3': {'O': 1}, + 'O2': {'O': 1}, 'SH': {'S': 1}, + 'CG': {'H': 5, 'N': 3}, + 'C2N': {'H': 4, 'N': 2}, 'OCO': {'O': 2}, + 'N30': {'H': 4, 'N': 1}, + 'N31': {'H': 3, 'N': 1}, + 'N32': {'H': 2, 'N': 1}, + 'N33': {'H': 1, 'N': 1}, + 'NP1': {'H': 2, 'N': 1}, 'N1': {'N': 1}} +EXPECTED_ATOMS_BASE_INTERACTIONS = {'COO': {'O': 2}, 'HIS': {'N': 2}, + 'CYS': {'S': 1}, 'TYR': {'O': 1}, + 'LYS': {'N': 1}, 'ARG': {'N': 3}, + 'ROH': {'O': 1}, 'AMD': {'O': 1}, + 'TRP': {'N': 1}, 'N+': {'N': 1}, + 'C-': {'O': 2}, 'BBN': {'H': 1, 'N': 1}, + 'BBC': {'O': 1}, 'NAR': {'H': 1, 'N': 1}, + 'NAM': {'H': 1, 'N': 1}, 'F': {'F': 1}, + 'Cl': {'Cl': 1}, 'OH': {'H': 1, 'O': 1}, + 'OP': {'O': 1}, 'O3': {'O': 1}, + 'O2': {'O': 1}, 'SH': {'S': 1}, + 'CG': {'N': 3}, 'C2N': {'N': 2}, + 'OCO': {'O': 2}, 'N30': {'N': 1}, + 'N31': {'N': 1}, 'N32': {'N': 1}, + 'N33': {'N': 1}, 'NP1': {'N': 1}, + 'N1': {'N': 1}} class Group: + """Class for storing groups important to pKa calculations.""" + def __init__(self, atom): - #info('Made new %s group from %s'%(type,atom)) + """Initialize with an atom. + + Args: + atom: atom object + """ self.atom = atom self.type = '' atom.group = self - # set up data structures - self.determinants = {'sidechain':[],'backbone':[],'coulomb':[]} + self.determinants = {'sidechain': [], 'backbone': [], 'coulomb': []} self.pka_value = 0.0 self.model_pka = 0.0 - - self.Emass = 0.0 - self.Nmass = 0.0 - self.Elocl = 0.0 - self.Nlocl = 0.0 + # Energy associated with volume interactions + self.energy_volume = 0.0 + # Number of atoms associated with volume interactions + self.num_volume = 0.0 + # Energy associated with local interactions + self.energy_local = 0.0 + # Number of atoms associated with local interactions + self.num_local = 0.0 self.buried = 0.0 self.x = 0.0 self.y = 0.0 self.z = 0.0 self.charge = 0 + self.parameters = None + self.exclude_cys_from_results = None self.interaction_atoms_for_acids = [] self.interaction_atoms_for_bases = [] self.model_pka_set = False - + self.intrinsic_pka = None + self.titratable = None # information on covalent and non-covalent coupling self.non_covalently_coupled_groups = [] self.covalently_coupled_groups = [] self.coupled_titrating_group = None self.common_charge_centre = False - - self.residue_type = self.atom.res_name if self.atom.terminal: self.residue_type = self.atom.terminal - - if self.atom.type=='atom': - self.label = '%-3s%4d%2s'%(self.residue_type, atom.res_num, atom.chain_id) - elif self.atom.res_name in ['DA ','DC ','DG ','DT ']: - self.label = '%1s%1s%1s%4d%2s'%(self.residue_type[1], - atom.element, - atom.name.replace('\'','')[-1], - atom.res_num, - atom.chain_id) - -# self.label = '%1s%1s%1s%4d%2s'%(self.residue_type[1], atom.element,atom.name[-1], atom.res_num, atom.chain_id) + if self.atom.type == 'atom': + self.label = '%-3s%4d%2s' % (self.residue_type, atom.res_num, + atom.chain_id) + elif self.atom.res_name in ['DA ', 'DC ', 'DG ', 'DT ']: + self.label = '%1s%1s%1s%4d%2s' % (self.residue_type[1], + atom.element, + atom.name.replace('\'', '')[-1], + atom.res_num, + atom.chain_id) else: - self.label = '%-3s%4s%2s'%(self.residue_type, atom.name, atom.chain_id) - - + self.label = '%-3s%4s%2s' % (self.residue_type, atom.name, + atom.chain_id) # container for squared distances self.squared_distances = {} - return - - - # - # Coupling-related methods - # def couple_covalently(self, other): - """ Couple this group with another group """ + """Couple this group with another group. + + Args: + other: other group for coupling + """ # do the coupling if not other in self.covalently_coupled_groups: self.covalently_coupled_groups.append(other) - if not self in other.covalently_coupled_groups: other.covalently_coupled_groups.append(self) - return - def couple_non_covalently(self, other): - """ Couple this group with another group """ + """Non-covalenthly couple this group with another group. + + Args: + other: other group for coupling + """ # do the coupling if not other in self.non_covalently_coupled_groups: self.non_covalently_coupled_groups.append(other) - if not self in other.non_covalently_coupled_groups: other.non_covalently_coupled_groups.append(self) - return + def get_covalently_coupled_groups(self): + """Get covalently coupled groups. - def get_covalently_coupled_groups(self): return self.covalently_coupled_groups - def get_non_covalently_coupled_groups(self): return self.non_covalently_coupled_groups + Returns: + list of covalently coupled groups. + """ + return self.covalently_coupled_groups + def get_non_covalently_coupled_groups(self): + """Get non-covalently coupled groups. + + Returns: + list of covalently coupled groups. + """ + return self.non_covalently_coupled_groups def share_determinants(self, others): + """Share determinants between this group and others. + Args: + others: list of other groups + """ # for each determinant type for other in others: if other == self: + the_other = other continue - - for type in ['sidechain','backbone','coulomb']: - for g in other.determinants[type]: self.share_determinant(g,type) - + for type_ in ['sidechain', 'backbone', 'coulomb']: + for det in other.determinants[type_]: + self.share_determinant(det, type_) # recalculate pka values self.calculate_total_pka() - other.calculate_total_pka() + the_other.calculate_total_pka() - return + def share_determinant(self, new_determinant, type_): + """Add determinant to this group's list of determinants. - - def share_determinant(self, new_determinant, type): + Args: + new_determinant: determinant to add + type_: type of determinant + """ added = False # first check if we already have a determinant with this label - for own_determinant in self.determinants[type]: + for own_determinant in self.determinants[type_]: if own_determinant.group == new_determinant.group: # if so, find the average value - avr = (own_determinant.value + new_determinant.value)/2.0 + avr = 0.5*(own_determinant.value + new_determinant.value) own_determinant.value = avr new_determinant.value = avr added = True - # otherwise we just add the determinant to our list if not added: - self.determinants[type].append(propka.determinant.Determinant(new_determinant.group, - new_determinant.value)) - - return + self.determinants[type_].append(Determinant(new_determinant.group, + new_determinant.value)) def make_covalently_coupled_line(self): + """Create line for covalent coupling. + Returns: + string + """ # first check if there are any coupled groups at all if len(self.covalently_coupled_groups) == 0: return '' - - line = 'CCOUPL%5d'%self.atom.numb - + line = 'CCOUPL%5d' % self.atom.numb # extract and sort numbers of coupled groups coupled = [] - for g in self.covalently_coupled_groups: - coupled.append(g.atom.numb) + for group in self.covalently_coupled_groups: + coupled.append(group.atom.numb) coupled.sort() - # write 'em out - for b in coupled: - line += '%5d'%b + for num in coupled: + line += '%5d' % num line += '\n' - return line def make_non_covalently_coupled_line(self): + """Create line for non-covalent coupling. + + Returns: + string + """ # first check if there are any coupled groups at all if len(self.non_covalently_coupled_groups) == 0: return '' - - line = 'NCOUPL%5d'%self.atom.numb - + line = 'NCOUPL%5d' % self.atom.numb # extract and sort numbers of coupled groups coupled = [] - for g in self.non_covalently_coupled_groups: - coupled.append(g.atom.numb) + for group in self.non_covalently_coupled_groups: + coupled.append(group.atom.numb) coupled.sort() - # write 'em out - for b in coupled: - line += '%5d'%b + for num in coupled: + line += '%5d' % num line += '\n' - - return line - # - # Bookkeeping methods - # def __eq__(self, other): - """ - Check if two groups should be considered identical - """ + """Needed for creating sets of groups.""" if self.atom.type == 'atom': # In case of protein atoms we trust the labels - return self.label==other.label + return self.label == other.label else: # For heterogene atoms we also need to check the residue number - return self.label==other.label and self.atom.res_num == other.atom.res_num + return (self.label == other.label) \ + and (self.atom.res_num == other.atom.res_num) def __hash__(self): - """ Needed together with __eq__ - otherwise we can't make sets of groups """ + """Needed for creating sets of groups.""" return id(self) def __iadd__(self, other): if self.type != other.type: - raise Exception('Cannot add groups of different types (%s and %s)'%(self.type,other.type)) - + errstr = 'Cannot add groups of different types (%s and %s)' \ + % (self.type, other.type) + raise Exception(errstr) # add all values self.pka_value += other.pka_value - self.Nmass += other.Nmass - self.Emass += other.Emass - self.Nlocl += other.Nlocl - self.Elocl += other.Elocl + self.num_volume += other.num_volume + self.energy_volume += other.energy_volume + self.num_local += other.num_local + self.energy_local += other.energy_local self.buried += other.buried # and add all determinants - for type in ['sidechain','backbone','coulomb']: - for determinant in other.determinants[type]: - self.add_determinant(determinant, type) + # TODO - list ['sidechain', 'backbone', 'coulomb'] should be constant + # This list appears all over the code and should be moved to a constant + # higher in the package + for type_ in ['sidechain', 'backbone', 'coulomb']: + for determinant in other.determinants[type_]: + self.add_determinant(determinant, type_) return self + def add_determinant(self, new_determinant, type_): + """Add to current and creates non-present determinants. - def add_determinant(self, new_determinant, type): - """ Adds to current and creates non-present determinants""" + Args: + new_determinant: new determinant to add + type_: determinant type + """ # first check if we already have a determinant with this label - for own_determinant in self.determinants[type]: + for own_determinant in self.determinants[type_]: if own_determinant.group == new_determinant.group: # if so, add the value own_determinant.value += new_determinant.value return - # otherwise we just add the determinant to our list - self.determinants[type].append(propka.determinant.Determinant(new_determinant.group, - new_determinant.value)) + self.determinants[type_].append(Determinant(new_determinant.group, + new_determinant.value)) - return + def set_determinant(self, new_determinant, type_): + """Overwrite current and create non-present determinants. - def set_determinant(self, new_determinant, type): - """ Overwrites current and creates non-present determinants""" + Args: + new_determinant: new determinant to add + type_: determinant type + """ # first check if we already have a determinant with this label - for own_determinant in self.determinants[type]: + for own_determinant in self.determinants[type_]: if own_determinant.group == new_determinant.group: # if so, overwrite the value own_determinant.value = new_determinant.value return - # otherwise we just add the determinant to our list - self.determinants[type].append(propka.determinant.Determinant(new_determinant.group, - new_determinant.value)) - - return + self.determinants[type_].append(Determinant(new_determinant.group, + new_determinant.value)) def remove_determinants(self, labels): - """ removes all determinants with label in labels """ - for type in ['sidechain','backbone','coulomb']: - matches = list(filter(lambda d: d.label in labels, [d for d in self.determinants[type]])) - for m in matches: self.determinants[type].remove(m) + """Remove all determinants with specified labels. - return + Args: + labels: list of labels to remove + """ + for type_ in ['sidechain', 'backbone', 'coulomb']: + matches = list(filter(lambda d: d.label in labels, \ + [d for d in self.determinants[type_]])) + for match in matches: + self.determinants[type_].remove(match) def __truediv__(self, value): value = float(value) # divide all values self.pka_value /= value - self.Nmass /= value - self.Emass /= value - self.Nlocl /= value - self.Elocl /= value + self.num_volume /= value + self.energy_volume /= value + self.num_local /= value + self.energy_local /= value self.buried /= value # and all determinants - for type in ['sidechain','backbone','coulomb']: - for determinant in self.determinants[type]: + for type_ in ['sidechain', 'backbone', 'coulomb']: + for determinant in self.determinants[type_]: determinant.value /= value return self def clone(self): + """Create a copy of this group. + + Returns: + Copy of this group. + """ res = Group(self.atom) res.type = self.type res.residue_type = self.residue_type @@ -344,17 +340,14 @@ class Group: return res def setup(self): + """Set up a group.""" # set the charges if self.type in self.parameters.charge.keys(): self.charge = self.parameters.charge[self.type] if self.residue_type in self.parameters.ions.keys(): self.charge = self.parameters.ions[self.residue_type] - - #info('ION setup',self,self.residue_type, self.charge) - # find the center and the interaction atoms self.setup_atoms() - # set the model pka value self.titratable = False if self.residue_type in self.parameters.model_pkas.keys(): @@ -364,783 +357,841 @@ class Group: key = '%s-%s'%(self.atom.res_name.strip(), self.atom.name.strip()) if key in self.parameters.custom_model_pkas.keys(): self.model_pka = self.parameters.custom_model_pkas[key] - self.model_pka_set = True - if self.model_pka_set and not self.atom.cysteine_bridge: self.titratable = True self.exclude_cys_from_results = False - return - def setup_atoms(self): - # This method is overwritten for some types of groups + """Set up atoms in group. + + This method is overwritten for some types of groups + """ # set the center at the position of the main atom self.set_center([self.atom]) # set the main atom as interaction atom self.set_interaction_atoms([self.atom], [self.atom]) - return - def set_interaction_atoms(self, interaction_atoms_for_acids, interaction_atoms_for_bases): - [a.set_group_type(self.type) for a in interaction_atoms_for_acids+interaction_atoms_for_bases] + def set_interaction_atoms(self, interaction_atoms_for_acids, + interaction_atoms_for_bases): + """Set interacting atoms and group types. + Args: + interaction_atoms_for_acids: list of atoms for acid interactions + interaction_atoms_for_base: list of atoms for base interactions + """ + for atom in interaction_atoms_for_acids + interaction_atoms_for_bases: + atom.set_group_type(self.type) self.interaction_atoms_for_acids = interaction_atoms_for_acids self.interaction_atoms_for_bases = interaction_atoms_for_bases - # check if all atoms have been identified ok = True - for [expect, found, t] in [[expected_atoms_acid_interactions, self.interaction_atoms_for_acids, 'acid'], - [expected_atoms_base_interactions, self.interaction_atoms_for_bases, 'base']]: + for [expect, found, _] in [[EXPECTED_ATOMS_ACID_INTERACTIONS, + self.interaction_atoms_for_acids, 'acid'], + [EXPECTED_ATOMS_BASE_INTERACTIONS, + self.interaction_atoms_for_bases, 'base']]: if self.type in expect.keys(): - for e in expect[self.type].keys(): - if len([a for a in found if a.element==e]) != expect[self.type][e]: + for elem in expect[self.type].keys(): + if len([a for a in found if a.element == elem]) \ + != expect[self.type][elem]: ok = False - if not ok: - warning('Missing atoms or failed protonation for %s (%s) -- please check the structure' % (self.label, self.type)) + str_ = 'Missing atoms or failed protonation for ' + str_ += '%s (%s) -- please check the structure' % (self.label, + self.type) + warning(str_) warning('%s' % self) - Na = sum([expected_atoms_acid_interactions[self.type][e] for e in expected_atoms_acid_interactions[self.type].keys()]) - Nb = sum([expected_atoms_base_interactions[self.type][e] for e in expected_atoms_base_interactions[self.type].keys()]) - - warning('Expected %d interaction atoms for acids, found:' % Na) + num_acid = sum([EXPECTED_ATOMS_ACID_INTERACTIONS[self.type][e] \ + for e in EXPECTED_ATOMS_ACID_INTERACTIONS[self.type].keys()]) + num_base = sum([EXPECTED_ATOMS_BASE_INTERACTIONS[self.type][e] \ + for e in EXPECTED_ATOMS_BASE_INTERACTIONS[self.type].keys()]) + warning('Expected %d interaction atoms for acids, found:' % num_acid) for i in range(len(self.interaction_atoms_for_acids)): - warning(' %s' % self.interaction_atoms_for_acids[i]) - - warning('Expected %d interaction atoms for bases, found:' % Nb) + warning(' %s' % self.interaction_atoms_for_acids[i]) + warning('Expected %d interaction atoms for bases, found:' % num_base) for i in range(len(self.interaction_atoms_for_bases)): - warning(' %s' % self.interaction_atoms_for_bases[i]) - - - #return - - return + warning(' %s' % self.interaction_atoms_for_bases[i]) def get_interaction_atoms(self, interacting_group): + """Get atoms involved in interaction with other group. + + Args: + interacting_group: other group + Returns: + list of atoms + """ if interacting_group.residue_type in self.parameters.base_list: return self.interaction_atoms_for_bases else: - return self.interaction_atoms_for_acids #default is acid interaction atoms - cf. 3.0 + # default is acid interaction atoms - cf. 3.0 + return self.interaction_atoms_for_acids def set_center(self, atoms): + """Set center of group based on atoms. + + Args: + atoms: list of atoms + """ if not atoms: raise ValueError("At least one atom must be specified") - # reset center - self.x = 0.0; self.y = 0.0; self.z = 0.0 - - # find the average positon of atoms + self.x = 0.0 + self.y = 0.0 + self.z = 0.0 + # find the average position of atoms for atom in atoms: - self.x += atom.x; self.y += atom.y; self.z += atom.z - + self.x += atom.x + self.y += atom.y + self.z += atom.z self.x /= float(len(atoms)) self.y /= float(len(atoms)) self.z /= float(len(atoms)) - return + def get_determinant_string(self, remove_penalised_group=False): + """Create a string to identify this determinant. - def getDeterminantString(self, remove_penalised_group=False): + Args: + remove_penalised_group: Boolean flag to remove penalized groups + Returns: + string + """ if self.coupled_titrating_group and remove_penalised_group: return '' - number_of_sidechain = len(self.determinants['sidechain']) - number_of_backbone = len(self.determinants['backbone']) - number_of_coulomb = len(self.determinants['coulomb']) - - number_of_lines = max(1, number_of_sidechain, number_of_backbone, number_of_coulomb) - str = "" + number_of_backbone = len(self.determinants['backbone']) + number_of_coulomb = len(self.determinants['coulomb']) + number_of_lines = max(1, number_of_sidechain, number_of_backbone, + number_of_coulomb) + str_ = "" for line_number in range(number_of_lines): - str += "%s" % (self.label) + str_ += "%s" % (self.label) if line_number == 0: - str += " %6.2lf" %(self.pka_value) - if len(self.non_covalently_coupled_groups)>0: - str+='*' + str_ += " %6.2lf" %(self.pka_value) + if len(self.non_covalently_coupled_groups) > 0: + str_ += '*' else: - str+=' ' - - # if self.atom.cysteine_bridge: - # str += " BONDED " - # else: - str += " %4d%2s " % (int(100.0*self.buried), "%") - - str += " %6.2lf %4d" % (self.Emass, self.Nmass) - str += " %6.2lf %4d" % (self.Elocl, self.Nlocl) + str_ += ' ' + str_ += " %4d%2s " % (int(100.0*self.buried), "%") + str_ += " %6.2lf %4d" % (self.energy_volume, self.num_volume) + str_ += " %6.2lf %4d" % (self.energy_local, self.num_local) else: - str += "%40s" % (" ") - + str_ += "%40s" % (" ") # add the determinants - for type in ['sidechain','backbone','coulomb']: - str += self.get_determinant_for_string(type,line_number) - + for type_ in ['sidechain', 'backbone', 'coulomb']: + str_ += self.get_determinant_for_string(type_, line_number) # adding end-of-line - str += "\n" + str_ += "\n" + str_ += "\n" + return str_ - str += "\n" + def get_determinant_for_string(self, type_, number): + """Return a string describing determinant. - return str - - def get_determinant_for_string(self, type, number): - if number >= len(self.determinants[type]): + Args: + type_: determinant type + number: determinant index number + Returns: + string + """ + if number >= len(self.determinants[type_]): empty_determinant = "%s%4d%2s" % ("XXX", 0, "X") return "%8.2lf %s" % (0.0, empty_determinant) else: - determinant = self.determinants[type][number] + determinant = self.determinants[type_][number] return "%8.2lf %s" % (determinant.value, determinant.label) - def calculate_total_pka(self): + """Calculate total pKa based on determinants associated with this + group.""" # if this is a cysteine involved in a di-sulphide bond if self.atom.cysteine_bridge: self.pka_value = 99.99 return - - - self.pka_value = self.model_pka + self.Emass + self.Elocl - + self.pka_value = self.model_pka + self.energy_volume + self.energy_local for determinant_type in ['sidechain', 'backbone', 'coulomb']: for determinant in self.determinants[determinant_type]: self.pka_value += determinant.value - return - - - - def calculate_intrinsic_pka(self): + """Calculate the intrinsic pKa values from the desolvation + determinants, back-bone hydrogen bonds, and side-chain hydrogen bonds + to non-titratable residues. """ - Calculates the intrinsic pKa values from the desolvation determinants, back-bone hydrogen bonds, - and side-chain hydrogen bond to non-titratable residues - """ - back_bone = 0.0 + back_bone = 0.0 for determinant in self.determinants['backbone']: value = determinant.value back_bone += value - side_chain = 0.0 for determinant in self.determinants['sidechain']: - if determinant.label[0:3] not in ['ASP','GLU','LYS','ARG','HIS','CYS','TYR','C- ','N+ ']: + if determinant.label[0:3] not in ['ASP', 'GLU', 'LYS', 'ARG', + 'HIS', 'CYS', 'TYR', 'C- ', + 'N+ ']: value = determinant.value side_chain += value + self.intrinsic_pka = self.model_pka + self.energy_volume \ + + self.energy_local + back_bone + side_chain - self.intrinsic_pKa = self.model_pka + self.Emass + self.Elocl + back_bone + side_chain + def get_summary_string(self, remove_penalised_group=False): + """Create summary string for this group. - return - - - - - - def getSummaryString(self, remove_penalised_group=False): + Args: + remove_penalised_group: Boolean to ignore penalized groups + Returns: + string + """ if self.coupled_titrating_group and remove_penalised_group: return '' - ligand_type = '' if self.atom.type == 'hetatm': ligand_type = self.type - penalty = '' if self.coupled_titrating_group: - penalty = ' NB: Discarded due to coupling with %s'%self.coupled_titrating_group.label - - str = " %9s %8.2lf %10.2lf %18s %s\n" % (self.label, - self.pka_value, - self.model_pka,ligand_type, - penalty) - - return str + penalty = ' NB: Discarded due to coupling with %s' \ + % self.coupled_titrating_group.label + str_ = " %9s %8.2lf %10.2lf %18s %s\n" % (self.label, + self.pka_value, + self.model_pka, + ligand_type, + penalty) + return str_ def __str__(self): - return 'Group (%s) for %s'%(self.type,self.atom) - - - - # - # Energy-related methods - # + return 'Group (%s) for %s' % (self.type, self.atom) def calculate_folding_energy(self, parameters, ph=None, reference=None): + """Return the electrostatic energy of this residue at specified pH. + + Args: + parameters: parameters for energy calculation + ph: pH value for calculation + reference: reference state for calculation + Returns: + float describing energy """ - returning the electrostatic energy of this residue at pH 'pH' - """ - if ph == None: - pH = parameters.pH - if reference == None: + if ph is None: + ph = parameters.pH + if reference is None: reference = parameters.reference - # If not titratable, the contribution is zero - if not self.titratable: return 0.00 - - # calculating the ddG(neutral --> low-pH) contribution - ddG_neutral = 0.00 + # calculating the ddg(neutral --> low-pH) contribution + ddg_neutral = 0.00 if reference == 'neutral' and self.charge > 0.00: pka_prime = self.pka_value for determinant in self.determinants['coulomb']: if determinant.value > 0.00: pka_prime -= determinant.value - ddG_neutral = -1.36*(pka_prime - self.model_pka) - - # calculating the ddG(low-pH --> pH) contribution + ddg_neutral = UNK_PKA_SCALING*(pka_prime - self.model_pka) + # calculating the ddg(low-pH --> pH) contribution # folded - x = ph - self.pka_value - y = 10**x - Q_pro = math.log10(1+y) - + dpka = ph - self.pka_value + conc_ratio = 10**dpka + q_pro = math.log10(1+conc_ratio) # unfolded - x = ph - self.model_pka - y = 10**x - Q_mod = math.log10(1+y) + dpka = ph - self.model_pka + conc_ratio = 10**dpka + q_mod = math.log10(1+conc_ratio) + ddg_low = UNK_PKA_SCALING*(q_pro - q_mod) + ddg = ddg_neutral + ddg_low + return ddg - ddG_low = -1.36*(Q_pro - Q_mod) - ddG = ddG_neutral + ddG_low - - return ddG - - def calculate_charge(self, parmaeters, ph=7.0, state='folded'): + def calculate_charge(self, _, ph=7.0, state='folded'): + """Calculate the charge of the specified state at the specified pH. + Args: + _: parameters for calculation + ph: pH value + state: "folded" or "unfolded" + Returns: + float with charge + """ if state == "unfolded": - x = self.charge * (self.model_pka - ph) + q_dpka = self.charge * (self.model_pka - ph) else: - x = self.charge * (self.pka_value - ph) - - y = 10**x - charge = self.charge*(y/(1.0+y)) - + q_dpka = self.charge * (self.pka_value - ph) + conc_ratio = 10**q_dpka + charge = self.charge*(conc_ratio/(1.0+conc_ratio)) return charge def use_in_calculations(self): - """ - Whether this group should be included in the results report. If - --titrate_only option is specified, only residues that are titratable - and are in that list are included; otherwise all titratable residues - and CYS residues are included. + """Indicate whether group should be included in results report. + + If --titrate_only option is specified, only residues that are + titratable and are in that list are included; otherwise all titratable + residues and CYS residues are included. """ return self.titratable or (self.residue_type == 'CYS' and \ not self.exclude_cys_from_results) -class COO_group(Group): +class COOGroup(Group): + """Carboxyl group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'COO' def setup_atoms(self): + """Set up group.""" # Identify the two caroxyl oxygen atoms the_oxygens = self.atom.get_bonded_elements('O') - # set the center using the two oxygen carboxyl atoms (if present) if the_oxygens: self.set_center(the_oxygens) else: self.set_center([self.atom]) - # FIXME perhaps it would be better to ignore this group completely + # TODO - perhaps it would be better to ignore this group completely # if the oxygen is missing from this residue? - self.set_interaction_atoms(the_oxygens, the_oxygens) - return -class HIS_group(Group): +class HISGroup(Group): + """Histidine group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'HIS' def setup_atoms(self): + """Set up atoms in group.""" # Find the atoms in the histidine ring ring_atoms = propka.ligand.is_ring_member(self.atom) if len(ring_atoms) != 5: warning('His group does not seem to contain a ring', self) - # protonate ring - for r in ring_atoms: - my_protonator.protonate_atom(r) - + for ring_atom in ring_atoms: + PROTONATOR.protonate_atom(ring_atom) # set the center using the ring atoms if ring_atoms: self.set_center(ring_atoms) else: # Missing side-chain atoms self.set_center([self.atom]) - # FIXME perhaps it would be better to ignore this group completely? - + # TODO - perhaps it would be better to ignore this group completely? # find the hydrogens on the ring-nitrogens hydrogens = [] nitrogens = [ra for ra in ring_atoms if ra.element == 'N'] - for nitrogen in nitrogens: hydrogens.extend(nitrogen.get_bonded_elements('H')) - self.set_interaction_atoms(hydrogens+nitrogens, nitrogens) - return +class CYSGroup(Group): + """Cysteine group.""" -class CYS_group(Group): def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'CYS' -class TYR_group(Group): +class TYRGroup(Group): + """Tyrosine group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'TYR' -class LYS_group(Group): +class LYSGroup(Group): + """Lysine group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'LYS' -class ARG_group(Group): +class ARGGroup(Group): + """Arginine group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'ARG' def setup_atoms(self): + """Set up group.""" # set the center at the position of the main atom self.set_center([self.atom]) - # find all the hydrogens on the nitrogen atoms nitrogens = self.atom.get_bonded_elements('N') - for n in nitrogens: - my_protonator.protonate_atom(n) - + for nitrogen in nitrogens: + PROTONATOR.protonate_atom(nitrogen) hydrogens = [] for nitrogen in nitrogens: hydrogens.extend(nitrogen.get_bonded_elements('H')) self.set_interaction_atoms(nitrogens+hydrogens, nitrogens) - return +class ROHGroup(Group): + """Alcohol group.""" -class ROH_group(Group): def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'ROH' -class SER_group(Group): + +class SERGroup(Group): + """Serine group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'SER' -class AMD_group(Group): + +class AMDGroup(Group): + """Amide group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'AMD' def setup_atoms(self): + """Setup group.""" # Identify the oxygen and nitrogen amide atoms the_oxygen = self.atom.get_bonded_elements('O') the_nitrogen = self.atom.get_bonded_elements('N') - # add protons to the nitrogen - my_protonator.protonate_atom(the_nitrogen[0]) + PROTONATOR.protonate_atom(the_nitrogen[0]) the_hydrogens = the_nitrogen[0].get_bonded_elements('H') - # set the center using the oxygen and nitrogen amide atoms self.set_center(the_oxygen+the_nitrogen) - - self.set_interaction_atoms(the_nitrogen+the_hydrogens,the_oxygen) - - return + self.set_interaction_atoms(the_nitrogen + the_hydrogens, the_oxygen) -class TRP_group(Group): +class TRPGroup(Group): + """Tryptophan group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'TRP' def setup_atoms(self): + """Set up atoms in group.""" # set the center at the position of the main atom self.set_center([self.atom]) - # find the hydrogen on the nitrogen atom - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogen = self.atom.get_bonded_elements('H') self.set_interaction_atoms(the_hydrogen+[self.atom], [self.atom]) - return -class Nterm_group(Group): +class NtermGroup(Group): + """N-terminus group.""" def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'N+' -class Cterm_group(Group): +class CtermGroup(Group): + """C-terminus group.""" def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'COO' # this is to deal with the COO-C- parameter unification. def setup_atoms(self): + """Set up atoms in group.""" # Identify the carbon and other oxygen carboxyl atoms the_carbons = self.atom.get_bonded_elements('C') if not the_carbons: self.set_center([self.atom]) - # FIXME perhaps it would be better to ignore this group completely + # TODO - perhaps it would be better to ignore this group completely # if the carbon is missing from this residue? else: the_other_oxygen = the_carbons[0].get_bonded_elements('O') the_other_oxygen.remove(self.atom) - # set the center and interaction atoms the_oxygens = [self.atom]+ the_other_oxygen self.set_center(the_oxygens) self.set_interaction_atoms(the_oxygens, the_oxygens) - return +class BBNGroup(Group): + """Backbone nitrogen group.""" - -class BBN_group(Group): def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'BBN' self.residue_type = 'BBN' - def setup_atoms(self): + """Set up atoms in group.""" # Identify the hydrogen - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogen = self.atom.get_bonded_elements('H') - # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) - return -class BBC_group(Group): +class BBCGroup(Group): + """Backbone carbon group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'BBC' self.residue_type = 'BBC' def setup_atoms(self): + """Set up atoms in group.""" # Identify the oxygen the_oxygen = self.atom.get_bonded_elements('O') - # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_oxygen, the_oxygen) - return -class NAR_group(Group): +class NARGroup(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'NAR' self.residue_type = 'NAR' info('Found NAR group:', atom) - return - def setup_atoms(self): + """Set up atoms in group.""" # Identify the hydrogen - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogen = self.atom.get_bonded_elements('H') - # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) - return +class NAMGroup(Group): + """Unknown group. + TODO - identify this group. + """ - -class NAM_group(Group): def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'NAM' self.residue_type = 'NAM' info('Found NAM group:', atom) - return - def setup_atoms(self): + """Set up atoms in this group.""" # Identify the hydrogen - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogen = self.atom.get_bonded_elements('H') - # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) - return +class FGroup(Group): + """Fluoride group.""" -class F_group(Group): def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'F' self.residue_type = 'F' info('Found F group:', atom) - return -class Cl_group(Group): + +class ClGroup(Group): + """Chloride group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'Cl' self.residue_type = 'Cl' info('Found Cl group:', atom) - return -class OH_group(Group): + +class OHGroup(Group): + """Hydroxide group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'OH' self.residue_type = 'OH' info('Found OH group:', atom) - return - def setup_atoms(self): + """Set up atoms in this group.""" # Identify the hydrogen - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogen = self.atom.get_bonded_elements('H') - # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) - return -class OP_group(Group): + +class OPGroup(Group): + """Phosphate group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'OP' self.residue_type = 'OP' info('Found OP group:', atom) - return - def setup_atoms(self): + """Set up atoms in this group.""" # Identify the hydrogen - my_protonator.protonate_atom(self.atom) - #the_hydrogen = self.atom.get_bonded_elements('H') - + PROTONATOR.protonate_atom(self.atom) # set the center using the oxygen self.set_center([self.atom]) #self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) self.set_interaction_atoms([self.atom], [self.atom]) - return -class O3_group(Group): +class O3Group(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'O3' self.residue_type = 'O3' info('Found O3 group:', atom) - return -class O2_group(Group): +class O2Group(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'O2' self.residue_type = 'O2' info('Found O2 group:', atom) - return -class SH_group(Group): + +class SHGroup(Group): + """Sulfhydryl group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'SH' self.residue_type = 'SH' info('Found SH group:', atom) - return -class CG_group(Group): - """Guadinium group""" +class CGGroup(Group): + """Guadinium group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'CG' self.residue_type = 'CG' info('Found CG group:', atom) - return def setup_atoms(self): + """Set up atoms in this group.""" # Identify the nitrogens the_nitrogens = self.atom.get_bonded_elements('N') - # set the center using the nitrogen self.set_center([self.atom]) - the_hydrogens = [] - for n in the_nitrogens: - my_protonator.protonate_atom(n) - the_hydrogens += n.get_bonded_elements('H') + for nitrogen in the_nitrogens: + PROTONATOR.protonate_atom(nitrogen) + the_hydrogens += nitrogen.get_bonded_elements('H') self.set_interaction_atoms(the_hydrogens+the_nitrogens, the_nitrogens) - return -class C2N_group(Group): - """Amidinium group""" +class C2NGroup(Group): + """Amidinium group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'C2N' self.residue_type = 'C2N' info('Found C2N group:', atom) - return def setup_atoms(self): + """Set up atoms in this group.""" # Identify the nitrogens the_nitrogens = self.atom.get_bonded_elements('N') - the_nitrogens = [n for n in the_nitrogens if len(n.get_bonded_heavy_atoms())==1] - + the_nitrogens = [n for n in the_nitrogens \ + if len(n.get_bonded_heavy_atoms()) == 1] # set the center using the nitrogen self.set_center([self.atom]) - the_hydrogens = [] - for n in the_nitrogens: - my_protonator.protonate_atom(n) - the_hydrogens += n.get_bonded_elements('H') - + for nitrogen in the_nitrogens: + PROTONATOR.protonate_atom(nitrogen) + the_hydrogens += nitrogen.get_bonded_elements('H') self.set_interaction_atoms(the_hydrogens+the_nitrogens, the_nitrogens) - return -class OCO_group(Group): + +class OCOGroup(Group): + """Carboxyl group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'OCO' self.residue_type = 'OCO' info('Found OCO group:', atom) - return def setup_atoms(self): - # Identify the two caroxyl oxygen atoms + """Set up atoms in group.""" + # Identify the two carboxyl oxygen atoms the_oxygens = self.atom.get_bonded_elements('O') - # set the center using the two oxygen carboxyl atoms self.set_center(the_oxygens) self.set_interaction_atoms(the_oxygens, the_oxygens) - return +class N30Group(Group): + """Unknown group. + + TODO - identify this group. + """ -class N30_group(Group): def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'N30' self.residue_type = 'N30' info('Found N30 group:', atom) - return def setup_atoms(self): + """Set up atoms in this group.""" # Identify the nitrogens - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogens = self.atom.get_bonded_elements('H') # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_hydrogens+[self.atom], [self.atom]) - return -class N31_group(Group): + +class N31Group(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'N31' self.residue_type = 'N31' info('Found N31 group:', atom) - return def setup_atoms(self): + """Set up atoms in this group.""" # Identify the nitrogens - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogens = self.atom.get_bonded_elements('H') # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_hydrogens+[self.atom], [self.atom]) - return -class N32_group(Group): + +class N32Group(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'N32' self.residue_type = 'N32' info('Found N32 group:', atom) - return def setup_atoms(self): + """Set up atoms in this group.""" # Identify the nitrogens - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogens = self.atom.get_bonded_elements('H') # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_hydrogens+[self.atom], [self.atom]) - return -class N33_group(Group): + +class N33Group(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'N33' self.residue_type = 'N33' info('Found N33 group:', atom) - return def setup_atoms(self): + """Set up atoms in this group.""" # Identify the nitrogens - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogens = self.atom.get_bonded_elements('H') # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_hydrogens+[self.atom], [self.atom]) - return -class NP1_group(Group): + +class NP1Group(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'NP1' self.residue_type = 'NP1' info('Found NP1 group:', atom) - return - def setup_atoms(self): + """Set up atoms in group.""" # Identify the nitrogens - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogens = self.atom.get_bonded_elements('H') # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_hydrogens+[self.atom], [self.atom]) - return -class N1_group(Group): + +class N1Group(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'N1' self.residue_type = 'N1' info('Found N1 group:', atom) - return +class IonGroup(Group): + """Ion group.""" -class Ion_group(Group): def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'ION' self.residue_type = atom.res_name.strip() info('Found ion group:', atom) - return -class non_titratable_ligand_group(Group): +class NonTitratableLigandGroup(Group): + """Non-titratable ligand group.""" + def __init__(self, atom): Group.__init__(self, atom) self.type = 'LG' self.residue_type = 'LG' -# info('Non-titratable ligand group',atom) - return -class titratable_ligand_group(Group): + +class TitratableLigandGroup(Group): + """Titratable ligand group.""" + def __init__(self, atom): Group.__init__(self, atom) # set the charge and determine type (acid or base) self.charge = atom.charge - if self.charge <0: + if self.charge < 0: self.type = 'ALG' self.residue_type = 'ALG' elif self.charge > 0: @@ -1148,8 +1199,6 @@ class titratable_ligand_group(Group): self.residue_type = 'BLG' else: raise Exception('Unable to determine type of ligand group - charge not set?') - - # check if marvin model pka has been calculated # this is not true if we are reading an input file if atom.marvin_pka: @@ -1157,192 +1206,196 @@ class titratable_ligand_group(Group): info('Titratable ligand group ', atom, self.model_pka, self.charge) self.model_pka_set = True - return - def is_group(parameters, atom): - atom.groups_extracted = 1 + """Identify whether the atom belongs to a group. + Args: + parameters: parameters for check + atom: atom to check + Returns: + group for atom or None + """ + atom.groups_extracted = 1 # check if this atom belongs to a protein group protein_group = is_protein_group(parameters, atom) - if protein_group: return protein_group - + if protein_group: + return protein_group # check if this atom belongs to a ion group ion_group = is_ion_group(parameters, atom) - if ion_group: return ion_group - + if ion_group: + return ion_group # check if this atom belongs to a ligand group if parameters.ligand_typing == 'marvin': ligand_group = is_ligand_group_by_marvin_pkas(parameters, atom) elif parameters.ligand_typing == 'sybyl': - ligand_group = is_ligand_group_by_sybyl_types(parameters, atom) + ligand_group = None elif parameters.ligand_typing == 'groups': ligand_group = is_ligand_group_by_groups(parameters, atom) else: - raise Exception('Unknown ligand typing method \'%s\''%parameters.ligand_typing) - - if ligand_group: return ligand_group - - - + errstr = 'Unknown ligand typing method \'%s\'' % parameters.ligand_typing + raise Exception(errstr) + if ligand_group: + return ligand_group return None -def is_protein_group(parameters,atom): +def is_protein_group(parameters, atom): + """Identify whether the atom belongs to a protein group. + + Args: + parameters: parameters for check + atom: atom to check + Returns: + group for atom or None + """ if atom.type != 'atom': return None - ### Check for termial groups if atom.terminal == 'N+': - return Nterm_group(atom) + return NtermGroup(atom) elif atom.terminal == 'C-': - return Cterm_group(atom) - + return CtermGroup(atom) ### Backbone if atom.type == 'atom' and atom.name == 'N': # ignore proline backbone nitrogens if atom.res_name != 'PRO': - return BBN_group(atom) + return BBNGroup(atom) if atom.type == 'atom' and atom.name == 'C': # ignore C- carboxyl if atom.count_bonded_elements('O') == 1: - return BBC_group(atom) - + return BBCGroup(atom) ### Filters for side chains based on PDB protein atom names - key = '%s-%s'%(atom.res_name, atom.name) - + key = '%s-%s' % (atom.res_name, atom.name) if key in parameters.protein_group_mapping.keys(): - return eval('%s_group(atom)'%parameters.protein_group_mapping[key]) - - return None - -def is_ligand_group_by_sybyl_types(parameters, atom): - - + class_str = "%sGroup" % parameters.protein_group_mapping[key] + group_class = globals()[class_str] + return group_class(atom) return None -def is_ligand_group_by_groups(parameters, atom): +def is_ligand_group_by_groups(_, atom): + """Identify whether the atom belongs to a ligand group by checking groups. + + Args: + _: parameters for check + atom: atom to check + Returns: + group for atom or None + """ ### Ligand group filters if atom.type != 'hetatm': return None - - my_protonator.protonate_atom(atom) - + PROTONATOR.protonate_atom(atom) if atom.sybyl_type == 'N.ar': - if len(atom.get_bonded_heavy_atoms())==2: - return NAR_group(atom) - + if len(atom.get_bonded_heavy_atoms()) == 2: + return NARGroup(atom) if atom.sybyl_type == 'N.am': - return NAM_group(atom) - + return NAMGroup(atom) if atom.sybyl_type in ['N.3', 'N.4']: heavy_bonded = atom.get_bonded_heavy_atoms() if len(heavy_bonded) == 0: - return N30_group(atom) + return N30Group(atom) elif len(heavy_bonded) == 1: - return N31_group(atom) + return N31Group(atom) elif len(heavy_bonded) == 2: - return N32_group(atom) + return N32Group(atom) elif len(heavy_bonded) == 3: - return N33_group(atom) - + return N33Group(atom) if atom.sybyl_type == 'N.1': - return N1_group(atom) - + return N1Group(atom) if atom.sybyl_type == 'N.pl3': # make sure that this atom is not part of a guadinium or amidinium group bonded_carbons = atom.get_bonded_elements('C') if len(bonded_carbons) == 1: bonded_nitrogens = bonded_carbons[0].get_bonded_elements('N') if len(bonded_nitrogens) == 1: - return NP1_group(atom) - - + return NP1Group(atom) if atom.sybyl_type == 'C.2': # Guadinium and amidinium groups bonded_nitrogens = atom.get_bonded_elements('N') - npls = [n for n in bonded_nitrogens if (n.sybyl_type == 'N.pl3' and len(n.get_bonded_heavy_atoms())==1)] + npls = [n for n in bonded_nitrogens if (n.sybyl_type == 'N.pl3' \ + and len(n.get_bonded_heavy_atoms()) == 1)] if len(npls) == 2: - n_with_max_two_heavy_atom_bonds = [n for n in bonded_nitrogens if len(n.get_bonded_heavy_atoms())<3] + n_with_max_two_heavy_atom_bonds = [n for n in bonded_nitrogens \ + if len(n.get_bonded_heavy_atoms()) < 3] if len(n_with_max_two_heavy_atom_bonds) == 2: - return C2N_group(atom) + return C2NGroup(atom) if len(bonded_nitrogens) == 3: - return CG_group(atom) + return CGGroup(atom) # carboxyl group bonded_oxygens = atom.get_bonded_elements('O') bonded_oxygens = [b for b in bonded_oxygens if 'O.co2' in b.sybyl_type] if len(bonded_oxygens) == 2: - return OCO_group(atom) - - + return OCOGroup(atom) if atom.sybyl_type == 'F': - return F_group(atom) - + return FGroup(atom) if atom.sybyl_type == 'Cl': - return Cl_group(atom) - + return ClGroup(atom) if atom.sybyl_type == 'O.3': if len(atom.get_bonded_heavy_atoms()) == 1: # phosphate group if atom.count_bonded_elements('P') == 1: - return OP_group(atom) + return OPGroup(atom) # hydroxyl group else: - return OH_group(atom) + return OHGroup(atom) # other SP3 oxygen else: - return O3_group(atom) - + return O3Group(atom) if atom.sybyl_type == 'O.2': - return O2_group(atom) - - + return O2Group(atom) if atom.sybyl_type == 'S.3': # sulphide group if len(atom.get_bonded_heavy_atoms()) == 1: - return SH_group(atom) - # other SP3 oxygen - #else: - # return S3_group(atom) - - + return SHGroup(atom) return None def is_ligand_group_by_marvin_pkas(parameters, atom): + """Identify whether the atom belongs to a ligand group by calculating + 'Marvin pKas'. + + Args: + parameters: parameters for check + atom: atom to check + Returns: + group for atom or None + """ if atom.type != 'hetatm': return None - # calculate Marvin ligand pkas for this conformation container # if not already done + # TODO - double-check testing coverage of these functions. if not atom.conformation_container.marvin_pkas_calculated: - lpka = propka.ligand_pka_values.ligand_pka_values(parameters) + lpka = ligand_pka_values(parameters) lpka.get_marvin_pkas_for_molecular_container(atom.molecular_container, - min_ph=parameters.min_ligand_model_pka, - max_ph=parameters.max_ligand_model_pka) - - + min_pH=parameters.min_ligand_model_pka, + max_pH=parameters.max_ligand_model_pka) if atom.marvin_pka: - return titratable_ligand_group(atom) - + return TitratableLigandGroup(atom) # Special case of oxygen in carboxyl group not assigned a pka value by marvin if atom.sybyl_type == 'O.co2': atom.charge = -1.0 - other_oxygen = [o for o in atom.get_bonded_elements('C')[0].get_bonded_elements('O') if not o==atom][0] + other_oxygen = [o for o \ + in atom.get_bonded_elements('C')[0].get_bonded_elements('O') \ + if not o == atom][0] atom.marvin_pka = other_oxygen.marvin_pka - return titratable_ligand_group(atom) - - + return TitratableLigandGroup(atom) if atom.element in parameters.hydrogen_bonds.elements: - return non_titratable_ligand_group(atom) - + return NonTitratableLigandGroup(atom) return None def is_ion_group(parameters, atom): + """Identify whether the atom belongs to an ion group. + Args: + parameters: parameters for check + atom: atom to check + Returns: + group for atom or None + """ if atom.res_name.strip() in parameters.ions.keys(): - return Ion_group(atom) - + return IonGroup(atom) return None diff --git a/propka/iterative.py b/propka/iterative.py index 132d42b..e3bebf9 100644 --- a/propka/iterative.py +++ b/propka/iterative.py @@ -339,8 +339,8 @@ class Iterative: coulomb += value self.pKa_NonIterative = group.model_pka - self.pKa_NonIterative += group.Emass - self.pKa_NonIterative += group.Elocl + self.pKa_NonIterative += group.energy_volume + self.pKa_NonIterative += group.energy_local self.pKa_NonIterative += side_chain self.pKa_NonIterative += back_bone self.pKa_NonIterative += coulomb diff --git a/propka/output.py b/propka/output.py index 531af02..6e15d32 100644 --- a/propka/output.py +++ b/propka/output.py @@ -133,7 +133,7 @@ def getDeterminantSection(protein, conformation, parameters): groups = [g for g in protein.conformations[conformation].groups if g.atom.chain_id == chain] for group in groups: if group.residue_type == residue_type: - str += "%s" % ( group.getDeterminantString(parameters.remove_penalised_group) ) + str += "%s" % ( group.get_determinant_string(parameters.remove_penalised_group) ) # Add a warning in case of coupled residues if protein.conformations[conformation].non_covalently_coupled_groups and not protein.options.display_coupled_residues: @@ -151,7 +151,7 @@ def getSummarySection(protein, conformation, parameters): for residue_type in parameters.write_out_order: for group in protein.conformations[conformation].groups: if group.residue_type == residue_type: - str += "%s" % ( group.getSummaryString(parameters.remove_penalised_group) ) + str += "%s" % ( group.get_summary_string(parameters.remove_penalised_group) ) return str diff --git a/propka/parameters.py b/propka/parameters.py index 143a59b..90e47b5 100644 --- a/propka/parameters.py +++ b/propka/parameters.py @@ -33,7 +33,7 @@ parameters = ['Nmin','Nmax','desolvationSurfaceScalingFactor','desolvat 'include_H_in_interactions','coupling_max_number_of_bonds', 'min_bond_distance_for_hydrogen_bonds','coupling_penalty', 'shared_determinants','common_charge_centre','hide_penalised_group', 'remove_penalised_group', - 'max_intrinsic_pKa_diff','min_interaction_energy','max_free_energy_diff','min_swap_pka_shift', + 'max_intrinsic_pka_diff','min_interaction_energy','max_free_energy_diff','min_swap_pka_shift', 'min_pka','max_pka','sidechain_interaction'] strings = ['version','output_file_tag','ligand_typing','pH','reference'] diff --git a/propka/propka.cfg b/propka/propka.cfg index a12e38a..58b1aec 100644 --- a/propka/propka.cfg +++ b/propka/propka.cfg @@ -342,7 +342,7 @@ common_charge_centre 0 remove_penalised_group 1 # non-covalent coupling -max_intrinsic_pKa_diff 2.0 +max_intrinsic_pka_diff 2.0 min_interaction_energy 0.5 max_free_energy_diff 1.0 min_swap_pka_shift 1.0 diff --git a/propka/version.py b/propka/version.py index 861fc5b..86c4268 100644 --- a/propka/version.py +++ b/propka/version.py @@ -18,8 +18,8 @@ class version: def calculate_desolvation(self, group): return self.desolvation_model(self.parameters, group) - def calculate_pair_weight(self, Nmass1, Nmass2): - return self.weight_pair_method(self.parameters, Nmass1, Nmass2) + def calculate_pair_weight(self, num_volume1, num_volume2): + return self.weight_pair_method(self.parameters, num_volume1, num_volume2) # side chains def hydrogen_bond_interaction(self, group1, group2): From 95e132e5209c8e5ce1b01581eb041c21f65cc41b Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 24 May 2020 11:21:55 -0700 Subject: [PATCH 15/65] De-lint hybrid36.py --- propka/hybrid36.py | 35 +++++++++++++++----------- tests/test_hybrid36.py | 57 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 15 deletions(-) create mode 100644 tests/test_hybrid36.py diff --git a/propka/hybrid36.py b/propka/hybrid36.py index e0af2dc..67f2c0c 100644 --- a/propka/hybrid36.py +++ b/propka/hybrid36.py @@ -4,16 +4,21 @@ http://cci.lbl.gov/hybrid_36/ """ import string -_hybrid36_upper_chars = set(string.ascii_uppercase) -_hybrid36_lower_chars = set(string.ascii_lowercase) -_hybrid36_digits = set(string.digits) -_hybrid36_upper_set = _hybrid36_upper_chars | _hybrid36_digits -_hybrid36_lower_set = _hybrid36_lower_chars | _hybrid36_digits + +_HYBRID36_UPPER_CHARS = set(string.ascii_uppercase) +_HYBRID36_LOWER_CHARS = set(string.ascii_lowercase) +_HYBRID36_DIGITS = set(string.digits) +_HYBRID36_UPPER_SET = _HYBRID36_UPPER_CHARS | _HYBRID36_DIGITS +_HYBRID36_LOWER_SET = _HYBRID36_LOWER_CHARS | _HYBRID36_DIGITS + def decode(input_string): - """ - Convert an input string of a number in hybrid-36 format to an integer. + """Convert an input string of a number in hybrid-36 format to an integer. + Args: + input_string: input string + Returns: + integer """ value_error_message = "invalid literal for hybrid-36 conversion: '%s'" @@ -27,7 +32,7 @@ def decode(input_string): else: sign = 1 - if not len(input_string): + if len(input_string) == 0: raise ValueError(value_error_message % input_string) # See http://cci.lbl.gov/hybrid_36/ for documentation on the format. @@ -35,21 +40,21 @@ def decode(input_string): num_chars = len(input_string) first_char = input_string[0] - if first_char in _hybrid36_digits: + if first_char in _HYBRID36_DIGITS: return sign * int(input_string) - elif first_char in _hybrid36_upper_chars: + elif first_char in _HYBRID36_UPPER_CHARS: reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars) - _hybrid36_set = _hybrid36_upper_set - elif first_char in _hybrid36_lower_chars: + _hybrid36_set = _HYBRID36_UPPER_SET + elif first_char in _HYBRID36_LOWER_CHARS: reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars) - _hybrid36_set = _hybrid36_lower_set + _hybrid36_set = _HYBRID36_LOWER_SET else: raise ValueError(value_error_message % original_input_string) # Check the validity of the input string: ASCII characters should be # either all uppercase or all lowercase. - for c in input_string[1:]: - if c not in _hybrid36_set: + for char in input_string[1:]: + if char not in _hybrid36_set: raise ValueError(value_error_message % original_input_string) # Convert with the int function. diff --git a/tests/test_hybrid36.py b/tests/test_hybrid36.py new file mode 100644 index 0000000..1af74d0 --- /dev/null +++ b/tests/test_hybrid36.py @@ -0,0 +1,57 @@ +import unittest + +import propka.hybrid36 as hybrid36 + +class Hybrid36Test(unittest.TestCase): + def testDecode(self): + test_values = { + "99999": 99999, + "A0000": 100000, + "0": 0, + "9": 9, + "A": 10, + " ZZZZY": 43770014, + "ZZZZZ": 43770015, # ZZZZZ - A0000 + 100000 + "a0000": 43770016, + "zzzzz": 87440031, + "zzzzy": 87440030, + "99": 99, + "A0": 100, + "ZZ": 1035, + "zz": 1971, + "-99999": -99999, + "-A0000": -100000, + "-0": 0, + "-9": -9, + "-A": -10, + "-ZZZZY": -43770014, + "-ZZZZZ": -43770015, # ZZZZZ - A0000 + 100000 + "-a0000": -43770016, + "-zzzzz": -87440031, + "-zzzzy": -87440030, + "-99": -99, + "-A0": -100, + "-ZZ": -1035, + "-zz": -1971, + "PROPKA": 954495146, + "A001Z": 100071, + "B0000": 1779616, + } + + for k, v in test_values.items(): + self.assertEqual(hybrid36.decode(k), v) + + def testErrors(self): + test_values = [ + "99X99", + "X9-99", + "XYZa", + "", + "-", + "!NotOk", + ] + + for v in test_values: + with self.assertRaises(ValueError) as e: + hybrid36.decode(v) + self.assertTrue(v in str(e.exception)) \ No newline at end of file From 04b52d458c64897e51265d3262639e4068045c04 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 24 May 2020 12:05:48 -0700 Subject: [PATCH 16/65] De-lint iterative.py. --- propka/determinants.py | 5 +- propka/iterative.py | 438 ++++++++++++++++++++--------------------- 2 files changed, 220 insertions(+), 223 deletions(-) diff --git a/propka/determinants.py b/propka/determinants.py index 101547a..fa80763 100644 --- a/propka/determinants.py +++ b/propka/determinants.py @@ -42,15 +42,14 @@ def set_determinants(propka_groups, version=None, options=None): interaction_type = version.parameters.interaction_matrix.get_value(group1.type, group2.type) if interaction_type == 'I': - propka.iterative.addtoDeterminantList(group1, group2, + propka.iterative.add_to_determinant_list(group1, group2, distance, iterative_interactions, version=version) elif interaction_type == 'N': add_determinants(group1, group2, distance, version) # --- Iterative section ---# - propka.iterative.add_determinants(iterative_interactions, version, - options=options) + propka.iterative.add_determinants(iterative_interactions, version) def add_determinants(group1, group2, distance, version): diff --git a/propka/iterative.py b/propka/iterative.py index e3bebf9..d14d86f 100644 --- a/propka/iterative.py +++ b/propka/iterative.py @@ -1,60 +1,66 @@ +"""Iterative functions for pKa calculations. -from __future__ import division -from __future__ import print_function - -import math, time - -import propka.lib as lib +These appear to mostly involve determinants. +""" from propka.determinant import Determinant -import propka.calculations -from propka.lib import info, warning, debug - -# Some library functions for the interative pKa determinants +from propka.lib import info, debug -def addtoDeterminantList(group1, group2, distance, iterative_interactions, version): +# TODO - these are undocumented constants +UNK_MIN_VALUE = 0.005 + + +def add_to_determinant_list(group1, group2, distance, iterative_interactions, version): + """Add iterative determinantes to the list. + + [[R1, R2], [side-chain, coulomb], [A1, A2]], ... + + NOTE - the sign is determined when the interaction is added to the iterative object! + NOTE - distance < coulomb_cutoff here + + Args: + group1: first group in pair + group2: second group in pair + distance: distance between groups + iterative_interactions: interaction list to modify + version: version object """ - Adds 'iterative determinants' to list ..., [[R1, R2], [side-chain, coulomb], [A1, A2]], ... - Note, the sign is determined when the interaction is added to the iterative object! - Note, distance < coulomb_cutoff here - """ - - hbond_value = version.hydrogen_bond_interaction(group1, group2) + hbond_value = version.hydrogen_bond_interaction(group1, group2) coulomb_value = version.electrostatic_interaction(group1, group2, distance) - # adding the interaction to 'iterative_interactions' if hbond_value or coulomb_value: - pair = [group1, group2] - - values = [hbond_value, coulomb_value] + pair = [group1, group2] + values = [hbond_value, coulomb_value] while None in values: values[values.index(None)] = 0.0 - annihilation = [0., 0.] - interaction = [pair, values, annihilation] + interaction = [pair, values, annihilation] iterative_interactions.append(interaction) - return +def add_iterative_acid_pair(object1, object2, interaction): + """Add the Coulomb 'iterative' interaction (an acid pair). -def addIterativeAcidPair(object1, object2, interaction): + The higher pKa is raised with QQ+HB + The lower pKa is lowered with HB + + Args: + object1: first object in pair + object2: second object in pair + interaction: list with [values, annihilation] """ - Adding the Coulomb 'iterative' interaction (an acid pair): - the higher pKa is raised with QQ+HB - the lower pKa is lowered with HB - """ - values = interaction[1] + values = interaction[1] annihilation = interaction[2] - hbond_value = values[0] + hbond_value = values[0] coulomb_value = values[1] diff = coulomb_value + 2*hbond_value - comp1 = object1.pKa_old + annihilation[0] + diff - comp2 = object2.pKa_old + annihilation[1] + diff - annihilation[0] = 0. - annihilation[1] = 0. + comp1 = object1.pka_old + annihilation[0] + diff + comp2 = object2.pka_old + annihilation[1] + diff + annihilation[0] = 0.0 + annihilation[1] = 0.0 if comp1 > comp2: # side-chain - determinant = [object2, hbond_value] + determinant = [object2, hbond_value] object1.determinants['sidechain'].append(determinant) determinant = [object1, -hbond_value] object2.determinants['sidechain'].append(determinant) @@ -64,7 +70,7 @@ def addIterativeAcidPair(object1, object2, interaction): annihilation[0] = -diff else: # side-chain - determinant = [object1, hbond_value] + determinant = [object1, hbond_value] object2.determinants['sidechain'].append(determinant) determinant = [object2, -hbond_value] object1.determinants['sidechain'].append(determinant) @@ -74,26 +80,31 @@ def addIterativeAcidPair(object1, object2, interaction): annihilation[1] = -diff -def addIterativeBasePair(object1, object2, interaction): +def add_iterative_base_pair(object1, object2, interaction): + """Add the Coulomb 'iterative' interaction (a base pair). + + The lower pKa is lowered + + Args: + object1: first object in pair + object2: second object in pair + interaction: list with [values, annihilation] """ - Adding the Coulomb 'iterative' interaction (a base pair): - the lower pKa is lowered - """ - values = interaction[1] + values = interaction[1] annihilation = interaction[2] - hbond_value = values[0] + hbond_value = values[0] coulomb_value = values[1] diff = coulomb_value + 2*hbond_value diff = -diff - comp1 = object1.pKa_old + annihilation[0] + diff - comp2 = object2.pKa_old + annihilation[1] + diff - annihilation[0] = 0. - annihilation[1] = 0. + comp1 = object1.pka_old + annihilation[0] + diff + comp2 = object2.pka_old + annihilation[1] + diff + annihilation[0] = 0.0 + annihilation[1] = 0.0 if comp1 < comp2: # side-chain determinant = [object2, -hbond_value] object1.determinants['sidechain'].append(determinant) - determinant = [object1, hbond_value] + determinant = [object1, hbond_value] object2.determinants['sidechain'].append(determinant) # Coulomb determinant = [object2, -coulomb_value] @@ -103,7 +114,7 @@ def addIterativeBasePair(object1, object2, interaction): # side-chain determinant = [object1, -hbond_value] object2.determinants['sidechain'].append(determinant) - determinant = [object2, hbond_value] + determinant = [object2, hbond_value] object1.determinants['sidechain'].append(determinant) # Coulomb determinant = [object1, -coulomb_value] @@ -111,254 +122,241 @@ def addIterativeBasePair(object1, object2, interaction): annihilation[1] = -diff -def addIterativeIonPair(object1, object2, interaction, version): - """ - Adding the Coulomb 'iterative' interaction (an acid-base pair): +def add_iterative_ion_pair(object1, object2, interaction, version): + """Add the Coulomb 'iterative' interaction (an acid-base pair) + the pKa of the acid is lowered & the pKa of the base is raised + + Args: + object1: first object in pair + object2: second object in pair + interaction: list with [values, annihilation] + version: version object """ - values = interaction[1] + values = interaction[1] annihilation = interaction[2] - hbond_value = values[0] + hbond_value = values[0] coulomb_value = values[1] - Q1 = object1.Q - Q2 = object2.Q - comp1 = object1.pKa_old + annihilation[0] + Q1*coulomb_value - comp2 = object2.pKa_old + annihilation[1] + Q2*coulomb_value + q1 = object1.q + q2 = object2.q + comp1 = object1.pka_old + annihilation[0] + q1*coulomb_value + comp2 = object2.pka_old + annihilation[1] + q2*coulomb_value if object1.res_name not in version.parameters.exclude_sidechain_interactions: - comp1 += Q1*hbond_value + comp1 += q1*hbond_value if object2.res_name not in version.parameters.exclude_sidechain_interactions: - comp2 += Q2*hbond_value - - if Q1 == -1.0 and comp1 < comp2: - add_term = True # pKa(acid) < pKa(base) - elif Q1 == 1.0 and comp1 > comp2: - add_term = True # pKa(base) > pKa(acid) + comp2 += q2*hbond_value + if q1 == -1.0 and comp1 < comp2: + add_term = True # pKa(acid) < pKa(base) + elif q1 == 1.0 and comp1 > comp2: + add_term = True # pKa(base) > pKa(acid) else: - add_term = False - + add_term = False annihilation[0] = 0.00 annihilation[1] = 0.00 - - if add_term == True: - - # Coulomb - if coulomb_value > 0.005: - # residue1 - interaction = [object2, Q1*coulomb_value] - annihilation[0] += -Q1*coulomb_value - object1.determinants['coulomb'].append(interaction) - # residue2 - interaction = [object1, Q2*coulomb_value] - annihilation[1] += -Q2*coulomb_value - object2.determinants['coulomb'].append(interaction) - - # Side-chain - if hbond_value > 0.005: - # residue1 - if object1.res_name not in version.parameters.exclude_sidechain_interactions: - interaction = [object2, Q1*hbond_value] - annihilation[0] += -Q1*hbond_value - object1.determinants['sidechain'].append(interaction) - # residue2 - if object2.res_name not in version.parameters.exclude_sidechain_interactions: - interaction = [object1, Q2*hbond_value] - annihilation[1] += -Q2*hbond_value - object2.determinants['sidechain'].append(interaction) + if add_term: + # Coulomb + if coulomb_value > UNK_MIN_VALUE: + # residue1 + interaction = [object2, q1*coulomb_value] + annihilation[0] += -q1*coulomb_value + object1.determinants['coulomb'].append(interaction) + # residue2 + interaction = [object1, q2*coulomb_value] + annihilation[1] += -q2*coulomb_value + object2.determinants['coulomb'].append(interaction) + # Side-chain + if hbond_value > UNK_MIN_VALUE: + # residue1 + if object1.res_name not in version.parameters.exclude_sidechain_interactions: + interaction = [object2, q1*hbond_value] + annihilation[0] += -q1*hbond_value + object1.determinants['sidechain'].append(interaction) + # residue2 + if object2.res_name not in version.parameters.exclude_sidechain_interactions: + interaction = [object1, q2*hbond_value] + annihilation[1] += -q2*hbond_value + object2.determinants['sidechain'].append(interaction) -def add_determinants(iterative_interactions, version, options=None): - """ +def add_determinants(iterative_interactions, version, _=None): + """Add determinants iteratively. + The iterative pKa scheme. Later it is all added in 'calculateTotalPKA' + + Args: + iterative_interactions: list of iterative interactions + version: version object + _: options object """ # --- setup --- iteratives = [] done_group = [] - # creating iterative objects with references to their real group counterparts for interaction in iterative_interactions: pair = interaction[0] for group in pair: if group in done_group: - #print "done already" - """ do nothing - already have an iterative object for this group """ + # do nothing - already have an iterative object for this group + pass else: - newIterative = Iterative(group) - iteratives.append(newIterative) + new_iterative = Iterative(group) + iteratives.append(new_iterative) done_group.append(group) - # Initialize iterative scheme debug("\n --- pKa iterations (%d groups, %d interactions) ---" % (len(iteratives), len(iterative_interactions))) converged = False iteration = 0 # set non-iterative pka values as first step - for itres in iteratives: - itres.pKa_iter.append(itres.pKa_NonIterative) - - + for iter_ in iteratives: + iter_.pka_iter.append(iter_.pka_noniterative) # --- starting pKa iterations --- - while converged == False: + while not converged: + # initialize pka_new + iteration += 1 + for itres in iteratives: + itres.determinants = {'sidechain': [], 'backbone': [], + 'coulomb': []} + itres.pka_new = itres.pka_noniterative + # Adding interactions to temporary determinant container + for interaction in iterative_interactions: + pair = interaction[0] + object1, object2 = find_iterative(pair, iteratives) + q1 = object1.q + q2 = object2.q + if q1 < 0.0 and q2 < 0.0: + # both are acids + add_iterative_acid_pair(object1, object2, interaction) + elif q1 > 0.0 and q2 > 0.0: + # both are bases + add_iterative_base_pair(object1, object2, interaction) + else: + # one of each + add_iterative_ion_pair(object1, object2, interaction, version) + # Calculating pka_new values + for itres in iteratives: + for type_ in ['sidechain', 'backbone', 'coulomb']: + for determinant in itres.determinants[type_]: + itres.pka_new += determinant[1] - # initialize pKa_new - iteration += 1 - for itres in iteratives: - itres.determinants = {'sidechain':[],'backbone':[],'coulomb':[]} - itres.pKa_new = itres.pKa_NonIterative + # Check convergence + converged = True + for itres in iteratives: + if itres.pka_new == itres.pka_old: + itres.converged = True + else: + itres.converged = False + converged = False + # reset pka_old & storing pka_new in pka_iter + for itres in iteratives: + itres.pka_old = itres.pka_new + itres.pka_iter.append(itres.pka_new) - # Adding interactions to temporary determinant container - for interaction in iterative_interactions: - pair = interaction[0] - values = interaction[1] - annihilation = interaction[2] - #print "len(interaction) = %d" % (len(interaction)) - object1, object2 = findIterative(pair, iteratives) - Q1 = object1.Q - Q2 = object2.Q - if Q1 < 0.0 and Q2 < 0.0: - """ both are acids """ - addIterativeAcidPair(object1, object2, interaction) - elif Q1 > 0.0 and Q2 > 0.0: - """ both are bases """ - addIterativeBasePair(object1, object2, interaction) - else: - """ one of each """ - addIterativeIonPair(object1, object2, interaction, version) - - - # Calculating pKa_new values - for itres in iteratives: - for type in ['sidechain','backbone','coulomb']: - for determinant in itres.determinants[type]: - itres.pKa_new += determinant[1] - - # Check convergence - converged = True - for itres in iteratives: - if itres.pKa_new == itres.pKa_old: - itres.converged = True - else: - itres.converged = False - converged = False - - # reset pKa_old & storing pKa_new in pKa_iter - for itres in iteratives: - itres.pKa_old = itres.pKa_new - itres.pKa_iter.append(itres.pKa_new) - - if iteration == 10: - info("did not converge in %d iterations" % (iteration)) - break - - # --- Iterations finished --- - + if iteration == 10: + info("did not converge in %d iterations" % (iteration)) + break # printing pKa iterations # formerly was conditioned on if options.verbosity >= 2 - now unnecessary - str = "%12s" % (" ") - for index in range(0, iteration+1 ): - str += "%8d" % (index) - debug(str) + str_ = "%12s" % (" ") + for index in range(iteration+1): + str_ += "%8d" % (index) + debug(str_) for itres in iteratives: - str = "%s " % (itres.label) - for pKa in itres.pKa_iter: - str += "%8.2lf" % (pKa) - if itres.converged == False: - str += " *" - debug(str) - + str_ = "%s " % (itres.label) + for pka in itres.pka_iter: + str_ += "%8.2lf" % (pka) + if not itres.converged: + str_ += " *" + debug(str_) # creating real determinants and adding them to group object for itres in iteratives: - for type in ['sidechain','backbone','coulomb']: - for interaction in itres.determinants[type]: + for type_ in ['sidechain', 'backbone', 'coulomb']: + for interaction in itres.determinants[type_]: #info('done',itres.group.label,interaction[0],interaction[1]) value = interaction[1] - if value > 0.005 or value < -0.005: - g = interaction[0] - newDeterminant = Determinant(g, value) - itres.group.determinants[type].append(newDeterminant) + if value > UNK_MIN_VALUE or value < -UNK_MIN_VALUE: + group = interaction[0] + new_det = Determinant(group, value) + itres.group.determinants[type_].append(new_det) +def find_iterative(pair, iteratives): + """Find the 'iteratives' that correspond to the groups in 'pair'. -def findIterative(pair, iteratives): - """ - Function to find the two 'iteratives' that corresponds to the groups in 'pair' + Args: + pair: groups to match + iteratives: list of iteratives to search + Returns: + 1. first matched iterative + 2. second matched iterative """ for iterative in iteratives: if iterative.group == pair[0]: iterative0 = iterative elif iterative.group == pair[1]: iterative1 = iterative - return iterative0, iterative1 - class Iterative: - """ - Iterative class - pKa values and references of iterative groups - Note, this class has a fake determinant list, true determinants are - made after the iterations are finished. + """Iterative class - pKa values and references of iterative groups. + + NOTE - this class has a fake determinant list, true determinants are made + after the iterations are finished. """ def __init__(self, group): - """ - Contructer of the iterative object - """ + """Initialize object with group. - #print "creating 'iterative object' for %s" % (group.label) - - self.label = group.label - self.atom = group.atom - self.res_name = group.residue_type - self.Q = group.charge - self.pKa_old = None - self.pKa_new = None - self.pKa_iter = [] - self.pKa_NonIterative = 0.00 - self.determinants = {'sidechain':[],'backbone':[],'coulomb':[]} + Args: + group: group to use for initialization. + """ + self.label = group.label + self.atom = group.atom + self.res_name = group.residue_type + self.q = group.charge + self.pka_old = None + self.pka_new = None + self.pka_iter = [] + self.pka_noniterative = 0.00 + self.determinants = {'sidechain': [], 'backbone': [], 'coulomb': []} self.group = group self.converged = True - # Calculate the Non-Iterative part of pKa from the group object # Side chain side_chain = 0.00 for determinant in group.determinants['sidechain']: value = determinant.value side_chain += value - # Back bone - back_bone = 0.00 + back_bone = 0.00 for determinant in group.determinants['backbone']: value = determinant.value - back_bone += value - + back_bone += value # Coulomb - coulomb = 0.00 + coulomb = 0.00 for determinant in group.determinants['coulomb']: value = determinant.value - coulomb += value - - self.pKa_NonIterative = group.model_pka - self.pKa_NonIterative += group.energy_volume - self.pKa_NonIterative += group.energy_local - self.pKa_NonIterative += side_chain - self.pKa_NonIterative += back_bone - self.pKa_NonIterative += coulomb - - self.pKa_old = self.pKa_NonIterative - + coulomb += value + self.pka_noniterative = group.model_pka + self.pka_noniterative += group.energy_volume + self.pka_noniterative += group.energy_local + self.pka_noniterative += side_chain + self.pka_noniterative += back_bone + self.pka_noniterative += coulomb + self.pka_old = self.pka_noniterative def __eq__(self, other): - """ - Check if two groups should be considered identical - """ + """Needed to use objects in sets.""" if self.atom.type == 'atom': # In case of protein atoms we trust the labels - return self.label==other.label + return self.label == other.label else: # For heterogene atoms we also need to check the residue number - return self.label==other.label and self.atom.res_num == other.atom.res_num + return self.label == other.label \ + and self.atom.res_num == other.atom.res_num def __hash__(self): - """ Needed together with __eq__ - otherwise we can't make sets of groups """ + """Needed to use objects in sets.""" return id(self) From 59a8717664a0bf29e276a0bf604681b3d83743ea Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 24 May 2020 15:30:35 -0700 Subject: [PATCH 17/65] De-lint lib.py. --- propka/atom.py | 10 +- propka/lib.py | 291 +++++++++++++++++++++++++++++-------------------- 2 files changed, 180 insertions(+), 121 deletions(-) diff --git a/propka/atom.py b/propka/atom.py index 78a6a50..37b6da6 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -240,7 +240,7 @@ class Atom(object): if self.group.titratable: model_pka = '%6.2f'%self.group.model_pka str_ = "%-6s%5d %s " % (self.type.upper(), self.numb, - propka.lib.makeTidyAtomLabel(self.name, self.element)) + propka.lib.make_tidy_atom_label(self.name, self.element)) str_ += "%s%2s%4d%12.3lf%8.3lf%8.3lf%6s%6s \n" % (self.res_name, self.chain_id, self.res_num, self.x, self.y, self.z, group, model_pka) @@ -313,7 +313,7 @@ class Atom(object): String with PDB line. """ str_ = "%-6s%5d " % (self.type.upper(), self.numb) - str_ += "%s %s" % (propka.lib.makeTidyAtomLabel(self.name, self.element), + str_ += "%s %s" % (propka.lib.make_tidy_atom_label(self.name, self.element), self.res_name) str_ += "%2s%4d%12.3lf%8.3lf%8.3lf%6s%6s\n" % (self.chain_id, self.res_num, self.x, self.y, self.z, @@ -330,7 +330,7 @@ class Atom(object): Returns: String with MOL2 line. """ - str_ = "%-4d %-4s " % (id_, propka.lib.makeTidyAtomLabel(self.name, + str_ = "%-4d %-4s " % (id_, propka.lib.make_tidy_atom_label(self.name, self.element)) str_ += "%10.4f %10.4f %10.4f " % (self.x, self.y, self.z) str_ += "%6s %6d %10s %10.4f\n" % (self.sybyl_type.replace('-', ''), @@ -369,7 +369,7 @@ class Atom(object): beta = self.beta str_ = "ATOM " str_ += "%6d" % (numb) - str_ += " %s" % (propka.lib.makeTidyAtomLabel(name, self.element)) + str_ += " %s" % (propka.lib.make_tidy_atom_label(name, self.element)) str_ += " %s" % (res_name) str_ += "%2s" % (chain_id) str_ += "%4d" % (res_num) @@ -388,7 +388,7 @@ class Atom(object): Returns: String with label""" - return propka.lib.makeTidyAtomLabel(self.name, self.element) + return propka.lib.make_tidy_atom_label(self.name, self.element) def __str__(self): """Return an undefined-format string version of this atom.""" diff --git a/propka/lib.py b/propka/lib.py index cd25446..5ca270e 100644 --- a/propka/lib.py +++ b/propka/lib.py @@ -1,112 +1,142 @@ -from __future__ import division -from __future__ import print_function - +"""Implements many of the main functions used to call PROPKA.""" import sys -import pkg_resources import logging import argparse +import pkg_resources -logger = logging.getLogger("propka") -stdout_handler = logging.StreamHandler(sys.stdout) -stdout_handler.setFormatter(logging.Formatter("%(message)s")) -logger.addHandler(stdout_handler) +_LOGGER = logging.getLogger("propka") +_STDOUT_HANDLER = logging.StreamHandler(sys.stdout) +_STDOUT_HANDLER.setFormatter(logging.Formatter("%(message)s")) +_LOGGER.addHandler(_STDOUT_HANDLER) -# -# file I/O -# -def open_file_for_reading(filename): - """Open file or file-like stream *filename* for reading. - *filename* may be a string and then it is opened but if it is a - file-like object (such as an open :class:`file` or - :class:`StringIO.StringIO` --- really anything with ``next()``, - ``read()``, ``readlines()``, ``readline``, ``close`` methods) then - the object is just passed through (the stream is attempted to be - reset to the beginning with ``fseek(0)``). +def open_file_for_reading(input_file): + """Open file or file-like stream for reading. + + TODO - convert this to a context manager + + Args: + input_file: path to file or file-like object. If file-like object, + then will attempt fseek(0). """ - if (hasattr(filename, 'next') or hasattr(filename, '__next__')) \ - and hasattr(filename, 'read') \ - and hasattr(filename, 'readline') and hasattr(filename, 'readlines') \ - and hasattr(filename, 'close'): - # already a stream - try: - filename.fseek(0) - except AttributeError: - pass - return filename + try: + input_file.fseek(0) + return input_file + except AttributeError: + pass try: - f = open(filename,'r') + file_ = open(input_file, 'rt') except: - raise IOError('Cannot find file %s' %filename) - return f + raise IOError('Cannot find file %s' % input_file) + return file_ -def open_file_for_writing(filename): - """Open file or file-like stream for writing""" - if hasattr(filename, 'write') and hasattr(filename, 'writeline') and hasattr(filename, 'writelines') \ - and hasattr(filename, 'close'): - # already a stream - try: - mode = filename.mode - except AttributeError: - mode = "w" + +def open_file_for_writing(input_file): + """Open file or file-like stream for writing. + + TODO - convert this to a context manager. + + Args: + input_file: path to file or file-like object. If file-like object, + then will attempt to get file mode. + """ + try: + mode = input_file.mode if not ("w" in mode or "a" in mode or "+" in mode): raise IOError("File/stream not open for writing") - return filename - + return input_file + except AttributeError: + pass try: - f = open(filename,'w') - except: - raise Exception('Could not open %s'%filename) - return f + file_ = open(input_file, 'wt') + except FileNotFoundError: + raise Exception('Could not open %s' % input_file) + return file_ + -# -# bookkeeping etc. -# def conformation_sorter(conf): + """TODO - figure out what this function does.""" model = int(conf[:-1]) altloc = conf[-1:] return model*100+ord(altloc) + def split_atoms_into_molecules(atoms): + """Maps atoms into molecules. + + Args: + atoms: list of atoms + Returns: + list of molecules + """ molecules = [] - - while len(atoms)>0: + while len(atoms) > 0: initial_atom = atoms.pop() - molecules.append( make_molecule(initial_atom,atoms)) - + molecules.append(make_molecule(initial_atom, atoms)) return molecules + def make_molecule(atom, atoms): + """Make a molecule from atoms. + + Args: + atom: one of the atoms + atoms: a list of the remaining atoms + Return: + list of atoms + """ bonded_atoms = [a for a in atoms if atom in a.bonded_atoms] res_atoms = [atom,] - - for ba in bonded_atoms: - if ba in atoms: - atoms.remove(ba) - res_atoms.extend(make_molecule(ba, atoms)) - + for bond_atom in bonded_atoms: + if bond_atom in atoms: + atoms.remove(bond_atom) + res_atoms.extend(make_molecule(bond_atom, atoms)) return res_atoms -def make_grid(min,max,step): - x = min - while x <= max: +def make_grid(min_, max_, step): + """Make a grid across the specified tange. + + TODO - figure out if this duplicates existing generators like `range` or + numpy function. + + Args: + min_: minimum value of grid + max_: maximum value of grid + step: grid step size + """ + x = min_ + while x <= max_: yield x x += step - return + def generate_combinations(interactions): + """Generate combinations of interactions. + + Args: + interactions: list of interactions + Returns: + list of combinations + """ res = [[]] for interaction in interactions: res = make_combination(res, interaction) res.remove([]) - return res def make_combination(combis, interaction): + """Make a specific set of combinations. + + Args: + combis: list of combinations + interaction: interaction to add to combinations + Returns: + list of combinations + """ res = [] for combi in combis: res.append(combi+[interaction]) @@ -115,10 +145,14 @@ def make_combination(combis, interaction): def parse_res_string(res_str): - """ - Parse the residue string, in format "chain:resnum[inscode]", and return - a tuple of (chain, resnum, inscode). Raises ValueError if the input - string is invalid. + """Parse a residue string. + + Args: + res_string: residue string in format "chain:resnum[inscode]" + Returns: + a tuple of (chain, resnum, inscode). + Raises: + ValueError if the input string is invalid. """ try: chain, resnum_str = res_str.split(":") @@ -142,8 +176,8 @@ def build_parser(parser=None): """Build an argument parser for PROPKA. Args: - parser_: existing parser. If this is not None, then the PROPKA parser will - be created as a subparser to this existing parser. Otherwise, a + parser: existing parser. If this is not None, then the PROPKA parser will + be created as a subparser to this existing parser. Otherwise, a new parser will be created. Returns: ArgumentParser object. @@ -158,7 +192,6 @@ def build_parser(parser=None): # This is duck-typing at its finest group = parser group.add_argument("input_pdb", help="read data from ") - group.add_argument("-f", "--file", action="append", dest="filenames", default=[], help="read data from , i.e. is added to arguments") group.add_argument("-r", "--reference", dest="reference", default="neutral", @@ -223,31 +256,25 @@ def build_parser(parser=None): def loadOptions(args): - """ - Load the arguments parser with options. Note that verbosity is set as soon - as this function is invoked. + """Load the arguments parser with options. + + NOTE - verbosity is set as soon as this function is invoked. Arguments: args: list of arguments Returns: argparse namespace """ - # defining a 'usage' message - usage = "usage: %prog [options] filename" - # loading the parser parser = build_parser() - # parsing and returning options and arguments if len(args) == 0: # command line options = parser.parse_args() else: options = parser.parse_args(args) - # adding specified filenames to arguments options.filenames.append(options.input_pdb) - # Convert titrate_only string to a list of (chain, resnum) items: if options.titrate_only is not None: res_list = [] @@ -255,76 +282,108 @@ def loadOptions(args): try: chain, resnum, inscode = parse_res_string(res_str) except ValueError: - logger.critical('Invalid residue string: "%s"' % res_str) + _LOGGER.critical('Invalid residue string: "%s"', res_str) sys.exit(1) res_list.append((chain, resnum, inscode)) options.titrate_only = res_list - - # Set the no-print variable level = getattr(logging, options.log_level) - logger.setLevel(level) - + _LOGGER.setLevel(level) # done! return options -def makeTidyAtomLabel(name,element): - """ - Returns a 'tidier' atom label for printing the new pdbfile - """ +def make_tidy_atom_label(name, element): + """Returns a 'tidier' atom label for printing to the new PDB file. - if len(name)>4:# if longer than 4, just truncate the name - label=name[0:4] - elif len(name)==4:# if lenght is 4, otherwise use the name as it is + Args: + name: atom name + element: atom element + Returns: + string + """ + if len(name) > 4: # if longer than 4, just truncate the name + label = name[0:4] + elif len(name) == 4: # if length is 4, otherwise use the name as it is label = name else: # if less than 4 characters long, insert white space as needed - if len(element)==1: - label = ' %-3s'%name - else: # The element shoul occupy the two first chars - label = '%-4s'%name - + if len(element) == 1: + label = ' %-3s' % name + else: # The element should occupy the two first chars + label = '%-4s' % name return label - def get_sorted_configurations(configuration_keys): - """ - extract and sort configurations + """Extract and sort configurations. + + Args: + configuration_keys: list of configuration keys + Returns: + list of configurations """ configurations = list(configuration_keys) configurations.sort(key=configuration_compare) return configurations + def configuration_compare(conf): + """TODO - figure out what this function does.""" return 100*int(conf[1:-2]) + ord(conf[-1]) +def write_file(filename, lines): + """Writes a new file. - -def writeFile(filename, lines): + Args: + filename: name of file + lines: lines to write to file """ - Writes a new file - """ - f = open_file_for_writing(filename) - + file_ = open_file_for_writing(filename) for line in lines: - f.write( "%s\n" % (line) ) - f.close() - + file_.write("%s\n" % (line)) + file_.close() def _args_to_str(arg_list): + """Summarize list of arguments in string. + + Args: + arg_list: list of arguments + Returns: + string + """ return " ".join(map(str, arg_list)) + def info(*args): - """Log a message. Level defaults to INFO unless overridden.""" - logger.info(_args_to_str(args)) + """Log a message to info. + + Level defaults to INFO unless overridden. + + Args: + args: argument list + """ + _LOGGER.info(_args_to_str(args)) + def debug(*args): - """Log a message on the DEBUG level.""" - logger.debug(_args_to_str(args)) + """Log a message to debug. + + Level defaults to DEBUG unless overridden. + + Args: + args: argument list + """ + _LOGGER.debug(_args_to_str(args)) + def warning(*args): - """Log a WARN message""" - logger.warning(_args_to_str(args)) + """Log a message to warning. + + Level defaults to WARNING unless overridden. + + Args: + args: argument list + """ + _LOGGER.warning(_args_to_str(args)) From 3a5bae5b41a9d6eb9ca18ca6c4aa13bc38998464 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 24 May 2020 16:39:59 -0700 Subject: [PATCH 18/65] De-lint ligand_pka_values.py --- propka/group.py | 8 +- propka/ligand_pka_values.py | 207 ++++++++++++++++++++++++------------ 2 files changed, 142 insertions(+), 73 deletions(-) diff --git a/propka/group.py b/propka/group.py index 49e2552..fa746bf 100644 --- a/propka/group.py +++ b/propka/group.py @@ -2,7 +2,7 @@ import math import propka.ligand import propka.protonate -from propka.ligand_pka_values import ligand_pka_values +from propka.ligand_pka_values import LigandPkaValues from propka.determinant import Determinant from propka.lib import info, warning @@ -1368,10 +1368,10 @@ def is_ligand_group_by_marvin_pkas(parameters, atom): # if not already done # TODO - double-check testing coverage of these functions. if not atom.conformation_container.marvin_pkas_calculated: - lpka = ligand_pka_values(parameters) + lpka = LigandPkaValues(parameters) lpka.get_marvin_pkas_for_molecular_container(atom.molecular_container, - min_pH=parameters.min_ligand_model_pka, - max_pH=parameters.max_ligand_model_pka) + min_ph=parameters.min_ligand_model_pka, + max_ph=parameters.max_ligand_model_pka) if atom.marvin_pka: return TitratableLigandGroup(atom) # Special case of oxygen in carboxyl group not assigned a pka value by marvin diff --git a/propka/ligand_pka_values.py b/propka/ligand_pka_values.py index 512b7cd..7831cb2 100644 --- a/propka/ligand_pka_values.py +++ b/propka/ligand_pka_values.py @@ -1,15 +1,25 @@ -#!/usr/bin/env python - -from __future__ import division -from __future__ import print_function +"""Ligand pKa values""" +import os +import subprocess +import sys +import propka.molecular_container +import propka.calculations +import propka.parameters +import propka.pdb +import propka.lib from propka.lib import info, warning -import propka.molecular_container, propka.calculations, propka.calculations, propka.parameters, propka.pdb, propka.lib, os, subprocess, sys -class ligand_pka_values: +class LigandPkaValues: + """Ligand pKa value class.""" + def __init__(self, parameters): - self.parameters = parameters + """Initialize object with parameters. + Args: + parameters: parameters + """ + self.parameters = parameters # attempt to find Marvin executables in the path self.molconvert = self.find_in_path('molconvert') self.cxcalc = self.find_in_path('cxcalc') @@ -17,69 +27,130 @@ class ligand_pka_values: info(self.cxcalc) info(self.molconvert) - return + @staticmethod + def find_in_path(program): + """Find a program in the system path. - - def find_in_path(self, program): + Args: + program: program to find + Returns: + location of program + """ path = os.environ.get('PATH').split(os.pathsep) - - l = [i for i in filter(lambda loc: os.access(loc, os.F_OK), - map(lambda dir: os.path.join(dir, program),path))] - - if len(l) == 0: - info('Error: Could not find %s. Please make sure that it is found in the path.' % program) + locs = [i for i in filter(lambda loc: os.access(loc, os.F_OK), \ + map(lambda dir: os.path.join(dir, program), path))] + if len(locs) == 0: + str_ = "'Error: Could not find %s." % program + str_ += ' Please make sure that it is found in the path.' + info(str_) sys.exit(-1) + return locs[0] - return l[0] + def get_marvin_pkas_for_pdb_file(self, pdbfile, num_pkas=10, min_ph=-10, max_ph=20): + """Use Marvin executables to get pKas for a PDB file. - def get_marvin_pkas_for_pdb_file(self, file, no_pkas=10, min_pH =-10, max_pH=20): - molecule = propka.molecular_container.Molecular_container(file) - self.get_marvin_pkas_for_molecular_container(molecule, no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) - return + Args: + pdbfile: PDB file + num_pkas: number of pKas to get + min_ph: minimum pH value + max_ph: maximum pH value + """ + molecule = propka.molecular_container.Molecular_container(pdbfile) + self.get_marvin_pkas_for_molecular_container(molecule, + num_pkas=num_pkas, + min_ph=min_ph, + max_ph=max_ph) - def get_marvin_pkas_for_molecular_container(self, molecule, no_pkas=10, min_pH =-10, max_pH=20): + def get_marvin_pkas_for_molecular_container(self, molecule, num_pkas=10, min_ph=-10, max_ph=20): + """Use Marvin executables to calculate pKas for a molecular container. + + Args: + molecule: molecular container + num_pkas: number of pKas to calculate + min_ph: minimum pH value + max_ph: maximum pH value + """ for name in molecule.conformation_names: - filename = '%s_%s'%(molecule.name,name) - self.get_marvin_pkas_for_conformation_container(molecule.conformations[name], name=filename, reuse=molecule.options.reuse_ligand_mol2_file, - no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) + filename = '%s_%s' % (molecule.name, name) + self.get_marvin_pkas_for_conformation_container(molecule.conformations[name], + name=filename, + reuse=molecule.\ + options.reuse_ligand_mol2_file, + num_pkas=num_pkas, + min_ph=min_ph, + max_ph=max_ph) - return + def get_marvin_pkas_for_conformation_container(self, conformation, + name='temp', reuse=False, + num_pkas=10, min_ph=-10, + max_ph=20): + """Use Marvin executables to calculate pKas for a conformation container. - def get_marvin_pkas_for_conformation_container(self, conformation, name = 'temp', reuse=False, no_pkas=10, min_pH =-10, max_pH=20): + Args: + conformation: conformation container + name: filename + reuse: flag to reuse the structure files + num_pkas: number of pKas to calculate + min_ph: minimum pH value + max_ph: maximum pH value + """ conformation.marvin_pkas_calculated = True - self.get_marvin_pkas_for_atoms(conformation.get_heavy_ligand_atoms(), name=name, reuse=reuse, - no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) + self.get_marvin_pkas_for_atoms(conformation.get_heavy_ligand_atoms(), + name=name, reuse=reuse, + num_pkas=num_pkas, min_ph=min_ph, + max_ph=max_ph) - return + def get_marvin_pkas_for_atoms(self, atoms, name='temp', reuse=False, + num_pkas=10, min_ph=-10, max_ph=20): + """Use Marvin executables to calculate pKas for a list of atoms. - def get_marvin_pkas_for_atoms(self, atoms, name='temp', reuse=False, no_pkas=10, min_pH =-10, max_pH=20): + Args: + atoms: list of atoms + name: filename + reuse: flag to reuse the structure files + num_pkas: number of pKas to calculate + min_ph: minimum pH value + max_ph: maximum pH value + """ # do one molecule at the time so we don't confuse marvin molecules = propka.lib.split_atoms_into_molecules(atoms) - for i in range(len(molecules)): + for i, molecule in enumerate(molecules): filename = '%s_%d.mol2'%(name, i+1) - self.get_marvin_pkas_for_molecule(molecules[i], filename=filename, reuse=reuse, no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) + self.get_marvin_pkas_for_molecule(molecule, filename=filename, + reuse=reuse, num_pkas=num_pkas, + min_ph=min_ph, max_ph=max_ph) - return + def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2', + reuse=False, num_pkas=10, min_ph=-10, + max_ph=20): + """Use Marvin executables to calculate pKas for a molecule. - - def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2', reuse=False, no_pkas=10, min_pH =-10, max_pH=20): + Args: + molecule: the molecule + name: filename + reuse: flag to reuse the structure files + num_pkas: number of pKas to calculate + min_ph: minimum pH value + max_ph: maximum pH value + """ # print out structure unless we are using user-modified structure if not reuse: propka.pdb.write_mol2_for_atoms(atoms, filename) # check that we actually have a file to work with if not os.path.isfile(filename): - warning('Didn\'t find a user-modified file \'%s\' - generating one' % filename) + errstr = "Didn't find a user-modified file '%s' - generating one" \ + % filename + warning(errstr) propka.pdb.write_mol2_for_atoms(atoms, filename) - - - - # Marvin - # calculate pKa values - options = 'pka -a %d -b %d --min %f --max %f -d large'%(no_pkas, no_pkas, min_pH, max_pH) - (output,errors) = subprocess.Popen([self.cxcalc, filename]+options.split(), - stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() - - if len(errors)>0: + # Marvin calculate pKa values + options = 'pka -a %d -b %d --min %f --max %f -d large' % (num_pkas, + num_pkas, + min_ph, + max_ph) + (output, errors) = subprocess.Popen([self.cxcalc, filename]+options.split(), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE).communicate() + if len(errors) > 0: info('********************************************************************************************************') info('* Warning: Marvin execution failed: *') info('* %-100s *' % errors) @@ -87,36 +158,34 @@ class ligand_pka_values: info('* Please edit the ligand mol2 file and re-run PropKa with the -l option: %29s *' % filename) info('********************************************************************************************************') sys.exit(-1) - # extract calculated pkas - indices,pkas,types = self.extract_pkas(output) - + indices, pkas, types = self.extract_pkas(output) # store calculated pka values - for i in range(len(indices)): - atoms[indices[i]].marvin_pka = pkas[i] - atoms[indices[i]].charge = {'a':-1,'b':+1}[types[i]] - info('%s model pKa: %.2f' % (atoms[indices[i]], pkas[i])) + for i, index in enumerate(indices): + atoms[index].marvin_pka = pkas[i] + atoms[index].charge = {'a': -1, 'b': 1}[types[i]] + info('%s model pKa: %.2f' % (atoms[index], pkas[i])) - return + @staticmethod + def extract_pkas(output): + """Extract pKa value from output. - def extract_pkas(self, output): + Args: + output: output string to parse + Returns: + 1. Indices + 2. Values + 3. Types + """ # split output - [tags, values,empty_line] = output.decode().split('\n') - #info(tags) - #info(values) + [tags, values, _] = output.decode().split('\n') tags = tags.split('\t') values = values.split('\t') - # format values - types = [tags[i][0] for i in range(1,len(tags)-1) if len(values)>i and values[i] != ''] - indices = [int(a)-1 for a in values[-1].split(',') if a !=''] - values = [float(v.replace(',','.')) for v in values[1:-1] if v != ''] - + types = [tags[i][0] for i in range(1, len(tags)-1) if len(values) > i \ + and values[i] != ''] + indices = [int(a)-1 for a in values[-1].split(',') if a != ''] + values = [float(v.replace(',', '.')) for v in values[1:-1] if v != ''] if len(indices) != len(values) != len(types): raise Exception('Lengths of atoms and pka values mismatch') - return indices, values, types - - - - From 786a4c92927224b92b55bb018d87983fff6669b8 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 24 May 2020 18:29:38 -0700 Subject: [PATCH 19/65] De-lint ligand.py. --- propka/ligand.py | 431 +++++++++++++++++++---------------------------- 1 file changed, 176 insertions(+), 255 deletions(-) diff --git a/propka/ligand.py b/propka/ligand.py index ec571f7..e97efc6 100644 --- a/propka/ligand.py +++ b/propka/ligand.py @@ -1,16 +1,9 @@ -#!/usr/bin/python - -from __future__ import division -from __future__ import print_function - -import sys - -import propka.calculations -from propka.vector_algebra import * -from propka.lib import info, warning +"""Ligand classes and functions.""" +from propka.calculations import squared_distance +from propka.vector_algebra import vector -all_sybyl_types = [ +ALL_SYBYL_TYPES = [ 'C.3', # carbon sp3 'H', # hydrogen 'C.2', # carbon sp2 @@ -66,344 +59,272 @@ all_sybyl_types = [ 'Sn'] # tin -#propka_input_types = ['1P','1N','2P','2N'] -#for type in all_sybyl_types: -# temp = type.replace('.','') -# if len(temp)>3: -# temp = temp[0:3] -# propka_input_types.append(temp) -# -#for t in propka_input_types: -# print (t) - - -propka_input_types = [ - '1P', - '1N', - '2P', - '2N', - 'C3', - 'H', - 'C2', - 'Hsp', - 'C1', - 'Ht3', - 'Car', - 'LP', - 'Cca', - 'Du', - 'N3', - 'DuC', - 'N2', - 'Any', - 'N1', - 'Hal', - 'Nar', - 'Het', - 'Nam', - 'Hev', - 'Npl', - 'Li', - 'N4', - 'Na', - 'O3', - 'Mg', - 'O2', - 'Al', - 'Oco', - 'Si', - 'Osp', - 'K', - 'Ot3', - 'Ca', - 'S3', - 'Crt', - 'S2', - 'Cro', - 'SO', - 'Mn', - 'SO2', - 'Fe', - 'P3', - 'Coo', - 'F', - 'Cu', - 'Cl', - 'Zn', - 'Br', - 'Se', - 'I', - 'Mo', - 'Sn'] - - -max_C_double_bond = 1.3 -max_C_triple_bond = 1.2 - -max_C_double_bond_squared = max_C_double_bond*max_C_double_bond -max_C_triple_bond_squared = max_C_triple_bond*max_C_triple_bond - - +PROPKA_INPUT_TYPES = ['1P', '1N', '2P', '2N', 'C3', 'H', 'C2', 'Hsp', 'C1', + 'Ht3', 'Car', 'LP', 'Cca', 'Du', 'N3', 'DuC', 'N2', + 'Any', 'N1', 'Hal', 'Nar', 'Het', 'Nam', 'Hev', 'Npl', + 'Li', 'N4', 'Na', 'O3', 'Mg', 'O2', 'Al', 'Oco', 'Si', + 'Osp', 'K', 'Ot3', 'Ca', 'S3', 'Crt', 'S2', 'Cro', 'SO', + 'Mn', 'SO2', 'Fe', 'P3', 'Coo', 'F', 'Cu', 'Cl', 'Zn', + 'Br', 'Se', 'I', 'Mo', 'Sn'] +MAX_C_DOUBLE_BOND = 1.3 +MAX_C_TRIPLE_BOND = 1.2 +MAX_C_DOUBLE_BOND_SQUARED = MAX_C_DOUBLE_BOND*MAX_C_DOUBLE_BOND +MAX_C_TRIPLE_BOND_SQUARED = MAX_C_TRIPLE_BOND*MAX_C_TRIPLE_BOND +PLANARITY_MARGIN = 0.20 def assign_sybyl_type(atom): + """Assign Sybyl type to atom. + + Args: + atom: atom to assign + """ # check if we already have assigned a name to this atom if atom.sybyl_assigned: - #info(atom.name,'already assigned') return - # find some properties of the atom ring_atoms = is_ring_member(atom) - aromatic = is_aromatic_ring(ring_atoms) - planar = is_planar(atom) + aromatic = is_aromatic_ring(ring_atoms) + planar = is_planar(atom) bonded_elements = {} - for i in range(len(atom.bonded_atoms)): - bonded_elements[i]=atom.bonded_atoms[i].element - - - + for i, bonded_atom in enumerate(atom.bonded_atoms): + bonded_elements[i] = bonded_atom.element # Aromatic carbon/nitrogen if aromatic: - for ra in ring_atoms: - if ra.element in ['C','N']: - set_type(ra, ra.element+'.ar') + for ring_atom in ring_atoms: + if ring_atom.element in ['C', 'N']: + set_type(ring_atom, ring_atom.element+'.ar') return - # check for amide - if atom.element in ['O','N','C']: - O=None - N=None - C=None + if atom.element in ['O', 'N', 'C']: + o_atom = None + n_atom = None + c_atom = None # oxygen, nitrogen - if atom.element in ['O','N']: - for b in atom.get_bonded_elements('C'): - for bb in b.bonded_atoms: - if (bb.element =='N' and atom.element == 'O'): - O=atom - C=b - N=bb - elif (bb.element =='O' and atom.element == 'N'): - N=atom - C=b - O=bb + if atom.element in ['O', 'N']: + for bonded_elem in atom.get_bonded_elements('C'): + for bonded_atom in bonded_elem.bonded_atoms: + if (bonded_atom.element == 'N' and atom.element == 'O'): + o_atom = atom + c_atom = bonded_elem + n_atom = bonded_atom + elif (bonded_atom.element == 'O' and atom.element == 'N'): + n_atom = atom + c_atom = bonded_elem + o_atom = bonded_atom # carbon if atom.element == 'C': nitrogens = atom.get_bonded_elements('N') oxygens = atom.get_bonded_elements('O') - if len(nitrogens)==1 and len(oxygens)==1: - C = atom - N = nitrogens[0] - O = oxygens[0] - - - if C and N and O: + if len(nitrogens) == 1 and len(oxygens) == 1: + c_atom = atom + n_atom = nitrogens[0] + o_atom = oxygens[0] + if c_atom and n_atom and o_atom: # make sure that the Nitrogen is not aromatic and that it has two heavy atom bonds - if not is_aromatic_ring(is_ring_member(N)) and len(N.get_bonded_heavy_atoms())==2: - set_type(N,'N.am') - set_type(C,'C.2') - set_type(O,'O.2') + if not is_aromatic_ring(is_ring_member(n_atom)) \ + and len(n_atom.get_bonded_heavy_atoms()) == 2: + set_type(n_atom, 'N.am') + set_type(c_atom, 'C.2') + set_type(o_atom, 'O.2') return - - - if atom.element=='C': + if atom.element == 'C': # check for carboxyl - if len(atom.bonded_atoms)==3 and list(bonded_elements.values()).count('O')==2: - i1 = list(bonded_elements.values()).index('O') - i2 = list(bonded_elements.values()).index('O',i1+1) - if len(atom.bonded_atoms[i1].bonded_atoms)==1 and len(atom.bonded_atoms[i2].bonded_atoms)==1: - set_type(atom.bonded_atoms[i1],'O.co2-') - set_type(atom.bonded_atoms[i2],'O.co2') - set_type(atom,'C.2') + if len(atom.bonded_atoms) == 3 and list(bonded_elements.values()).count('O') == 2: + index1 = list(bonded_elements.values()).index('O') + index2 = list(bonded_elements.values()).index('O', index1+1) + if len(atom.bonded_atoms[index1].bonded_atoms) == 1 \ + and len(atom.bonded_atoms[index2].bonded_atoms) == 1: + set_type(atom.bonded_atoms[index1], 'O.co2-') + set_type(atom.bonded_atoms[index2], 'O.co2') + set_type(atom, 'C.2') return - - - # sp carbon - if len(atom.bonded_atoms)<=2: - for b in atom.bonded_atoms: - if propka.calculations.squared_distance(atom, b) < max_C_triple_bond_squared: - set_type(atom,'C.1') - set_type(b,b.element+'.1') + if len(atom.bonded_atoms) <= 2: + for bonded_atom in atom.bonded_atoms: + if squared_distance(atom, bonded_atom) < MAX_C_TRIPLE_BOND_SQUARED: + set_type(atom, 'C.1') + set_type(bonded_atom, bonded_atom.element + '.1') if atom.sybyl_assigned: return - # sp2 carbon if planar: - set_type(atom,'C.2') + set_type(atom, 'C.2') # check for N.pl3 - for b in atom.bonded_atoms: - if b.element=='N': - if len(b.bonded_atoms)<3 or is_planar(b): - set_type(b,'N.pl3') + for bonded_atom in atom.bonded_atoms: + if bonded_atom.element == 'N': + if len(bonded_atom.bonded_atoms) < 3 \ + or is_planar(bonded_atom): + set_type(bonded_atom, 'N.pl3') return - # sp3 carbon set_type(atom, 'C.3') return - # Nitrogen if atom.element == 'N': # check for planar N - if len(atom.bonded_atoms)==1: + if len(atom.bonded_atoms) == 1: if is_planar(atom.bonded_atoms[0]): - set_type(atom,'N.pl3') + set_type(atom, 'N.pl3') return - if planar: - set_type(atom,'N.pl3') + set_type(atom, 'N.pl3') return - - set_type(atom,'N.3') + set_type(atom, 'N.3') return - # Oxygen if atom.element == 'O': - set_type(atom,'O.3') - + set_type(atom, 'O.3') if len(atom.bonded_atoms) == 1: # check for carboxyl if atom.bonded_atoms[0].element == 'C': the_carbon = atom.bonded_atoms[0] - if len(the_carbon.bonded_atoms)==3 and the_carbon.count_bonded_elements('O')==2: - [O1,O2] = the_carbon.get_bonded_elements('O') - - if len(O1.bonded_atoms)==1 and len(O2.bonded_atoms)==1: - set_type(O1,'O.co2-') - set_type(O2,'O.co2') - set_type(the_carbon,'C.2') + if len(the_carbon.bonded_atoms) == 3 \ + and the_carbon.count_bonded_elements('O') == 2: + [oxy1, oxy2] = the_carbon.get_bonded_elements('O') + if len(oxy1.bonded_atoms) == 1 and len(oxy2.bonded_atoms) == 1: + set_type(oxy1, 'O.co2-') + set_type(oxy2, 'O.co2') + set_type(the_carbon, 'C.2') return - # check for X=O - if propka.calculations.squared_distance(atom, atom.bonded_atoms[0]) < max_C_double_bond_squared: - set_type(atom,'O.2') - if atom.bonded_atoms[0].element=='C': - set_type(atom.bonded_atoms[0],'C.2') + if squared_distance(atom, atom.bonded_atoms[0]) < MAX_C_DOUBLE_BOND_SQUARED: + set_type(atom, 'O.2') + if atom.bonded_atoms[0].element == 'C': + set_type(atom.bonded_atoms[0], 'C.2') return - - # Sulphur if atom.element == 'S': # check for SO2 - if list(bonded_elements.values()).count('O')==2: - i1 = list(bonded_elements.values()).index('O') - i2 = list(bonded_elements.values()).index('O',i1+1) - set_type(atom.bonded_atoms[i1],'O.2') - set_type(atom.bonded_atoms[i2],'O.2') - set_type(atom,'S.o2') + if list(bonded_elements.values()).count('O') == 2: + index1 = list(bonded_elements.values()).index('O') + index2 = list(bonded_elements.values()).index('O', index1+1) + set_type(atom.bonded_atoms[index1], 'O.2') + set_type(atom.bonded_atoms[index2], 'O.2') + set_type(atom, 'S.o2') return - # check for SO4 - if list(bonded_elements.values()).count('O')==4: - no_O2 = 0 + if list(bonded_elements.values()).count('O') == 4: + no_o2 = 0 for i in range(len(atom.bonded_atoms)): - if len(atom.bonded_atoms[i].bonded_atoms)==1 and no_O2<2: - set_type(atom.bonded_atoms[i],'O.2') - no_O2+=1 + if len(atom.bonded_atoms[i].bonded_atoms) == 1 and no_o2 < 2: + set_type(atom.bonded_atoms[i], 'O.2') + no_o2 += 1 else: - set_type(atom.bonded_atoms[i],'O.3') - - set_type(atom,'S.3') - - + set_type(atom.bonded_atoms[i], 'O.3') + set_type(atom, 'S.3') return - - # Phosphorus if atom.element == 'P': - set_type(atom,'P.3') - + set_type(atom, 'P.3') # check for phosphate group bonded_oxygens = atom.get_bonded_elements('O') - for o in bonded_oxygens: set_type(o,'O.3') -# if len(bonded_oxygens)>=3: -# # find oxygens only bonded to current phosphorus -# bonded_oxygens_1 = [o for o in bonded_oxygens if len(o.get_bonded_heavy_atoms())==1] -# # find the closest oxygen ... -# closest_oxygen = min(bonded_oxygens_1, -# key= lambda o:propka.calculations.squared_distance(atom,o)) -# # ... and set it to O.2 -# set_type(closest_oxygen,'O.2') - + for o_atom in bonded_oxygens: + set_type(o_atom, 'O.3') return - - - element = atom.element.capitalize() - set_type(atom,element) - # info('Using element as type for %s'%atom.element) - - return + set_type(atom, element) def is_ring_member(atom): - return identify_ring(atom,atom,0,[]) + """Determine if atom is a member of a ring. + + Args: + atom: atom to test + Returns: + list of atoms + """ + return identify_ring(atom, atom, 0, []) + def identify_ring(this_atom, original_atom, number, past_atoms): - number+=1 - past_atoms=past_atoms+[this_atom] + """Identify the atoms in a ring + + Args: + this_atom: atom to test + original_atom: some other atom + number: number of atoms + past_atoms: atoms that have already been found + Returns: + list of atoms + """ + number += 1 + past_atoms = past_atoms + [this_atom] return_atoms = [] if number > 10: return return_atoms - for atom in this_atom.get_bonded_heavy_atoms(): - if atom == original_atom and number>2: + if atom == original_atom and number > 2: return past_atoms - if atom not in past_atoms: - these_return_atoms = identify_ring(atom, original_atom, number, past_atoms) + these_return_atoms = identify_ring(atom, original_atom, number, + past_atoms) if len(these_return_atoms) > 0: - if len(return_atoms)>len(these_return_atoms) or len(return_atoms)==0: + if len(return_atoms) > len(these_return_atoms) \ + or len(return_atoms) == 0: return_atoms = these_return_atoms - return return_atoms +def set_type(atom, type_): + """Set atom type.. - -def set_type(atom,type): - #info(atom, '->',type) - atom.sybyl_type = type - atom.sybyl_assigned=True - return - - + Args: + atom: atom to set + type_: type value to set + """ + atom.sybyl_type = type_ + atom.sybyl_assigned = True def is_planar(atom): - """ Finds out if atom forms a plane together with its bonded atoms""" - atoms = [atom]+atom.bonded_atoms + """Finds out if atom forms a plane together with its bonded atoms. + + Args: + atom: atom to test + Returns: + Boolean + """ + atoms = [atom] + atom.bonded_atoms return are_atoms_planar(atoms) + def are_atoms_planar(atoms): - if len(atoms)==0: - return False - if len(atoms)<4: - return False - v1 = vector(atom1=atoms[0], atom2=atoms[1]) - v2 = vector(atom1=atoms[0], atom2=atoms[2]) - n = (v1**v2).rescale(1.0) + """Test whether a group of atoms are planar. - margin = 0.20 - for b in atoms[3:]: - v = vector(atom1=atoms[0], atom2=b).rescale(1.0) - #info(atoms[0],abs(v*n) ) - if abs(v*n)>margin: + Args: + atoms: list of atoms + Returns: + Boolean + """ + if len(atoms) == 0: + return False + if len(atoms) < 4: + return False + vec1 = vector(atom1=atoms[0], atom2=atoms[1]) + vec2 = vector(atom1=atoms[0], atom2=atoms[2]) + norm = (vec1**vec2).rescale(1.0) + margin = PLANARITY_MARGIN + for atom in atoms[3:]: + vec = vector(atom1=atoms[0], atom2=atom).rescale(1.0) + if abs(vec*norm) > margin: return False - return True -def is_aromatic_ring(atoms): - if len(atoms)<5: - return False +def is_aromatic_ring(atoms): + """Determine whether group of atoms form aromatic ring. + + Args: + atoms: list of atoms to test + Returns: + Boolean + """ + if len(atoms) < 5: + return False for i in range(len(atoms)): if not are_atoms_planar(atoms[i:]+atoms[:i]): return False - return True - - - - From d8ed3262b9cb33de5be71cbabb2bb776d1e2d32a Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 24 May 2020 18:32:47 -0700 Subject: [PATCH 20/65] Ignore Visual Studio Code files. --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 398bcbe..72d7fb3 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ *.py[cod] # PIP stuff *.egg-info +# Visual studio code +.vscode \ No newline at end of file From 6190f6d5f5fdc27e3d19b9eb20e793e43845ec7e Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sun, 24 May 2020 19:47:10 -0700 Subject: [PATCH 21/65] De-lint molecular_container.py. --- propka/molecular_container.py | 270 ++++++++++++++++++---------------- propka/output.py | 16 +- 2 files changed, 155 insertions(+), 131 deletions(-) diff --git a/propka/molecular_container.py b/propka/molecular_container.py index 74bf566..092989d 100644 --- a/propka/molecular_container.py +++ b/propka/molecular_container.py @@ -1,145 +1,140 @@ -#!/usr/bin/python -# -# Molecular container for storing all contents of pdb files -# -# -from __future__ import division -from __future__ import print_function - -import os, sys - -import propka.pdb, propka.version, propka.output, propka.conformation_container, propka.group, propka.lib +"""Molecular container for storing all contents of PDB files.""" +import os +import sys +import propka.pdb +import propka.version +import propka.output +import propka.group +import propka.lib +from propka.conformation_container import ConformationContainer from propka.lib import info, warning + +# TODO - these are constants whose origins are a little murky +UNK_PI_CUTOFF = 0.01 +# Maximum number of iterations for finding PI +MAX_ITERATION = 4 + + class Molecular_container: + """Container for storing molecular contents of PDB files.""" + def __init__(self, input_file, options=None): + """Initialize molecular container. + + Args: + input_file: molecular input file + options: options object + """ # printing out header before parsing input propka.output.printHeader() - # set up some values self.options = options self.input_file = input_file + # TODO - replace this indelicate os.path code with pathlib self.dir = os.path.split(input_file)[0] self.file = os.path.split(input_file)[1] self.name = self.file[0:self.file.rfind('.')] input_file_extension = input_file[input_file.rfind('.'):] - # set the version if options: parameters = propka.parameters.Parameters(self.options.parameters) else: parameters = propka.parameters.Parameters('propka.cfg') try: - exec('self.version = propka.version.%s(parameters)'%parameters.version) + version_class = getattr(propka.version, parameters.version) + self.version = version_class(parameters) except: - raise Exception('Error: Version %s does not exist'%parameters.version) - + errstr = 'Error: Version %s does not exist' % parameters.version + raise Exception(errstr) # read the input file if input_file_extension[0:4] == '.pdb': - # input is a pdb file - # read in atoms and top up containers to make sure that all atoms are present in all conformations - [self.conformations, self.conformation_names] = propka.pdb.read_pdb(input_file, self.version.parameters,self) - if len(self.conformations)==0: + # input is a pdb file. read in atoms and top up containers to make + # sure that all atoms are present in all conformations + [self.conformations, self.conformation_names] \ + = propka.pdb.read_pdb(input_file, self.version.parameters, self) + if len(self.conformations) == 0: info('Error: The pdb file does not seems to contain any molecular conformations') sys.exit(-1) - self.top_up_conformations() - # make a structure precheck - propka.pdb.protein_precheck(self.conformations, self.conformation_names) - + propka.pdb.protein_precheck(self.conformations, + self.conformation_names) # set up atom bonding and protonation self.version.setup_bonding_and_protonation(self) - # Extract groups self.extract_groups() - # sort atoms for name in self.conformation_names: self.conformations[name].sort_atoms() - # find coupled groups self.find_covalently_coupled_groups() - # write out the input file - filename = self.file.replace(input_file_extension,'.propka_input') + filename = self.file.replace(input_file_extension, '.propka_input') propka.pdb.write_input(self, filename) - - elif input_file_extension == '.propka_input': #input is a propka_input file - [self.conformations, self.conformation_names] = propka.pdb.read_input(input_file, self.version.parameters, self) - - # Extract groups - this merely sets up the groups found in the input file + [self.conformations, self.conformation_names] \ + = propka.pdb.read_input(input_file, self.version.parameters, + self) + # Extract groups - this merely sets up the groups found in the + # input file self.extract_groups() - # do some additional set up self.additional_setup_when_reading_input_file() - else: info('Unrecognized input file:%s' % input_file) sys.exit(-1) - - return def top_up_conformations(self): - """ Makes sure that all atoms are present in all conformations """ + """Makes sure that all atoms are present in all conformations.""" for name in self.conformation_names: - if name!='1A' and (len(self.conformations[name]) < len(self.conformations['1A'])): + if name != '1A' and (len(self.conformations[name]) \ + < len(self.conformations['1A'])): self.conformations[name].top_up(self.conformations['1A']) - return - def find_covalently_coupled_groups(self): + """Find covalently coupled groups.""" info('-' * 103) for name in self.conformation_names: self.conformations[name].find_covalently_coupled_groups() - return - def find_non_covalently_coupled_groups(self): + """Find non-covalently coupled groups.""" info('-' * 103) + verbose = self.options.display_coupled_residues for name in self.conformation_names: - self.conformations[name].find_non_covalently_coupled_groups(verbose=self.options.display_coupled_residues) - - return + self.conformations[name].find_non_covalently_coupled_groups(verbose=verbose) def extract_groups(self): - """ Identify the groups needed for pKa calculation """ + """Identify the groups needed for pKa calculation.""" for name in self.conformation_names: self.conformations[name].extract_groups() - return - def additional_setup_when_reading_input_file(self): + """Additional setup.""" for name in self.conformation_names: self.conformations[name].additional_setup_when_reading_input_file() - return - - def calculate_pka(self): + """Calculate pKa values.""" # calculate for each conformation for name in self.conformation_names: self.conformations[name].calculate_pka(self.version, self.options) - # find non-covalently coupled groups self.find_non_covalently_coupled_groups() - # find the average of the conformations self.average_of_conformations() - # print out the conformation-average results propka.output.printResult(self, 'AVR', self.version.parameters) - return - def average_of_conformations(self): + """Generate an average of conformations.""" + parameters = self.conformations[self.conformation_names[0]].parameters # make a new configuration to hold the average values - avr_conformation = propka.conformation_container.ConformationContainer(name='average', - parameters=self.conformations[self.conformation_names[0]].parameters, - molecular_container=self) - + avr_conformation = ConformationContainer(name='average', + parameters=parameters, + molecular_container=self) container = self.conformations[self.conformation_names[0]] for group in container.get_groups_for_calculations(): # new group to hold average values @@ -150,104 +145,133 @@ class Molecular_container: if group_to_add: avr_group += group_to_add else: - warning('Group %s could not be found in conformation %s.' % (group.atom.residue_label, name)) + str_ = 'Group %s could not be found in conformation %s.' \ + % (group.atom.residue_label, name) + warning(str_) # ... and store the average value avr_group = avr_group / len(self.conformation_names) avr_conformation.groups.append(avr_group) - # store information on coupling in the average container - if len(list(filter(lambda c: c.non_covalently_coupled_groups, self.conformations.values()))): + if len(list(filter(lambda c: c.non_covalently_coupled_groups, + self.conformations.values()))): avr_conformation.non_covalently_coupled_groups = True - # store chain info avr_conformation.chains = self.conformations[self.conformation_names[0]].chains - self.conformations['AVR'] = avr_conformation - return - def write_pka(self, filename=None, reference="neutral", direction="folding", options=None): - #for name in self.conformation_names: - # propka.output.writePKA(self, self.version.parameters, filename='%s_3.1_%s.pka'%(self.name, name), - # conformation=name,reference=reference, - # direction=direction, options=options) + def write_pka(self, filename=None, reference="neutral", + direction="folding", options=None): + """Write pKa information to a file. + Args: + filename: file to write to + reference: reference state + direction: folding vs. unfolding + options: options object + """ # write out the average conformation - filename=os.path.join('%s.pka'%(self.name)) - - # if the display_coupled_residues option is true, - # write the results out to an alternative pka file + filename = os.path.join('%s.pka' % (self.name)) + # if the display_coupled_residues option is true, write the results out + # to an alternative pka file if self.options.display_coupled_residues: - filename=os.path.join('%s_alt_state.pka'%(self.name)) - - if hasattr(self.version.parameters, 'output_file_tag') and len(self.version.parameters.output_file_tag)>0: - filename=os.path.join('%s_%s.pka'%(self.name,self.version.parameters.output_file_tag)) - + filename = os.path.join('%s_alt_state.pka' % (self.name)) + if hasattr(self.version.parameters, 'output_file_tag') \ + and len(self.version.parameters.output_file_tag) > 0: + filename = os.path.join('%s_%s.pka' % (self.name, + self.version.parameters.output_file_tag)) propka.output.writePKA(self, self.version.parameters, filename=filename, - conformation='AVR',reference=reference, + conformation='AVR', reference=reference, direction=direction, options=options) - return + def get_folding_profile(self, conformation='AVR', reference="neutral", + grid=[0., 14., 0.1]): + """Get a folding profile. - def getFoldingProfile(self, conformation='AVR',reference="neutral", direction="folding", grid=[0., 14., 0.1], options=None): + Args: + conformation: conformation to select + reference: reference state + direction: folding direction (folding) + grid: the grid of pH values [min, max, step_size] + options: options object + Returns: + TODO - figure out what these are + 1. profile + 2. opt + 3. range_80pct + 4. stability_range + """ # calculate stability profile profile = [] for ph in propka.lib.make_grid(*grid): - ddg = self.conformations[conformation].calculate_folding_energy( ph=ph, reference=reference) - #info(ph,ddg) + conf = self.conformations[conformation] + ddg = conf.calculate_folding_energy(ph=ph, reference=reference) profile.append([ph, ddg]) - # find optimum - opt =[None, 1e6] + opt = [None, 1e6] for point in profile: - opt = min(opt, point, key=lambda v:v[1]) - + opt = min(opt, point, key=lambda v: v[1]) # find values within 80 % of optimum range_80pct = [None, None] - values_within_80pct = [p[0] for p in profile if p[1]< 0.8*opt[1]] - if len(values_within_80pct)>0: + values_within_80pct = [p[0] for p in profile if p[1] < 0.8*opt[1]] + if len(values_within_80pct) > 0: range_80pct = [min(values_within_80pct), max(values_within_80pct)] - # find stability range stability_range = [None, None] - stable_values = [p[0] for p in profile if p[1]< 0.0] - - if len(stable_values)>0: + stable_values = [p[0] for p in profile if p[1] < 0.0] + if len(stable_values) > 0: stability_range = [min(stable_values), max(stable_values)] - return profile, opt, range_80pct, stability_range + def get_charge_profile(self, conformation='AVR', grid=[0., 14., .1]): + """Get charge profile for conformation as function of pH. - def getChargeProfile(self, conformation='AVR', grid=[0., 14., .1]): + Args: + conformation: conformation to test + grid: grid of pH values [min, max, step] + Returns: + list of charge state values + """ charge_profile = [] for ph in propka.lib.make_grid(*grid): - q_unfolded, q_folded = self.conformations[conformation].calculate_charge(self.version.parameters, ph=ph) + conf = self.conformations[conformation] + q_unfolded, q_folded = conf.calculate_charge(self.version.parameters, + ph=ph) charge_profile.append([ph, q_unfolded, q_folded]) - return charge_profile - def getPI(self, conformation='AVR', grid=[0., 14., 1], iteration=0): - #info('staring',grid, iteration) - # search - charge_profile = self.getChargeProfile(conformation=conformation, grid=grid) - pi = [] - pi_folded = pi_unfolded = [None, 1e6,1e6] - for point in charge_profile: - pi_folded = min(pi_folded, point, key=lambda v:abs(v[2])) - pi_unfolded = min(pi_unfolded, point, key=lambda v:abs(v[1])) + def get_pi(self, conformation='AVR', grid=[0., 14., 1], iteration=0): + """Get the isoelectric points for folded and unfolded states. - # If results are not good enough, do it again with a higher sampling resolution - pi_folded_value = pi_folded[0] + Args: + conformation: conformation to test + grid: grid of pH values [min, max, step] + iteration: iteration number of process + Returns: + 1. Folded state PI + 2. Unfolded state PI + """ + charge_profile = self.get_charge_profile(conformation=conformation, + grid=grid) + pi_folded = pi_unfolded = [None, 1e6, 1e6] + for point in charge_profile: + pi_folded = min(pi_folded, point, key=lambda v: abs(v[2])) + pi_unfolded = min(pi_unfolded, point, key=lambda v: abs(v[1])) + # If results are not good enough, do it again with a higher sampling + # resolution + pi_folded_value = pi_folded[0] pi_unfolded_value = pi_unfolded[0] step = grid[2] - if (pi_folded[2] > 0.01 or pi_unfolded[1] > 0.01) and iteration<4: - pi_folded_value, x = self.getPI(conformation=conformation, grid=[pi_folded[0]-step, pi_folded[0]+step, step/10.0], iteration=iteration+1) - x, pi_unfolded_value = self.getPI(conformation=conformation, grid=[pi_unfolded[0]-step, pi_unfolded[0]+step, step/10.0], iteration=iteration+1) - - + # TODO - need to warn if maximum number of iterations is exceeded + if (pi_folded[2] > UNK_PI_CUTOFF or pi_unfolded[1] > UNK_PI_CUTOFF) \ + and iteration < MAX_ITERATION: + pi_folded_value, _ = self.get_pi(conformation=conformation, + grid=[pi_folded[0]-step, + pi_folded[0]+step, + step/10.0], + iteration=iteration+1) + _, pi_unfolded_value = self.get_pi(conformation=conformation, + grid=[pi_unfolded[0]-step, + pi_unfolded[0]+step, + step/10.0], + iteration=iteration+1) return pi_folded_value, pi_unfolded_value - - - -if __name__ == '__main__': - input_file = sys.argv[1] - mc = Molecular_container(input_file) diff --git a/propka/output.py b/propka/output.py index 6e15d32..93c0e05 100644 --- a/propka/output.py +++ b/propka/output.py @@ -76,10 +76,10 @@ def writePKA(protein, parameters, filename=None, conformation ='1A',reference="n str += "%s\n" % ( getTheLine() ) # printing Folding Profile - str += getFoldingProfileSection(protein, conformation=conformation, reference=reference, direction=direction, window=[0., 14., 1.0], options=options) + str += get_folding_profileSection(protein, conformation=conformation, reference=reference, window=[0., 14., 1.0]) # printing Protein Charge Profile - str += getChargeProfileSection(protein, conformation=conformation) + str += get_charge_profileSection(protein, conformation=conformation) # now, writing the pka text to file file.write(str) @@ -156,7 +156,7 @@ def getSummarySection(protein, conformation, parameters): return str -def getFoldingProfileSection(protein, conformation='AVR', direction="folding", reference="neutral", window=[0., 14., 1.0], verbose=False, options=None): +def get_folding_profileSection(protein, conformation='AVR', direction="folding", reference="neutral", window=[0., 14., 1.0], verbose=False, options=None): """ returns the protein-folding-profile section """ @@ -164,9 +164,9 @@ def getFoldingProfileSection(protein, conformation='AVR', direction="folding", r str += "\n" str += "Free energy of %9s (kcal/mol) as a function of pH (using %s reference)\n" % (direction, reference) - profile, [pH_opt, dG_opt], [dG_min, dG_max], [pH_min, pH_max] = protein.getFoldingProfile(conformation=conformation, + profile, [pH_opt, dG_opt], [dG_min, dG_max], [pH_min, pH_max] = protein.get_folding_profile(conformation=conformation, reference=reference, - direction=direction, grid=[0., 14., 0.1], options=options) + grid=[0., 14., 0.1]) if profile == None: str += "Could not determine folding profile\n" else: @@ -195,13 +195,13 @@ def getFoldingProfileSection(protein, conformation='AVR', direction="folding", r -def getChargeProfileSection(protein, conformation='AVR', options=None): +def get_charge_profileSection(protein, conformation='AVR', options=None): """ returns the protein-folding-profile section """ str = "Protein charge of folded and unfolded state as a function of pH\n" - profile = protein.getChargeProfile(conformation=conformation,grid=[0., 14., 1.]) + profile = protein.get_charge_profile(conformation=conformation,grid=[0., 14., 1.]) if profile == None: str += "Could not determine charge profile\n" else: @@ -210,7 +210,7 @@ def getChargeProfileSection(protein, conformation='AVR', options=None): str += "%6.2lf%10.2lf%8.2lf\n" % (pH, Q_mod, Q_pro) - pI_pro, pI_mod = protein.getPI(conformation=conformation) + pI_pro, pI_mod = protein.get_pi(conformation=conformation) if pI_pro == None or pI_mod == None: str += "Could not determine the pI\n\n" else: From 3342d3418cb0eeb7eea8db3d525ace3dfc28d6ac Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 07:59:55 -0700 Subject: [PATCH 22/65] Document need to change Molecular_container name. --- propka/molecular_container.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/propka/molecular_container.py b/propka/molecular_container.py index 092989d..bd544b2 100644 --- a/propka/molecular_container.py +++ b/propka/molecular_container.py @@ -17,7 +17,11 @@ MAX_ITERATION = 4 class Molecular_container: - """Container for storing molecular contents of PDB files.""" + """Container for storing molecular contents of PDB files. + + TODO - this class name does not conform to PEP8 but has external use. + We should deprecate and change eventually. + """ def __init__(self, input_file, options=None): """Initialize molecular container. @@ -27,7 +31,7 @@ class Molecular_container: options: options object """ # printing out header before parsing input - propka.output.printHeader() + propka.output.print_header() # set up some values self.options = options self.input_file = input_file @@ -126,7 +130,7 @@ class Molecular_container: # find the average of the conformations self.average_of_conformations() # print out the conformation-average results - propka.output.printResult(self, 'AVR', self.version.parameters) + propka.output.print_result(self, 'AVR', self.version.parameters) def average_of_conformations(self): """Generate an average of conformations.""" @@ -179,7 +183,7 @@ class Molecular_container: and len(self.version.parameters.output_file_tag) > 0: filename = os.path.join('%s_%s.pka' % (self.name, self.version.parameters.output_file_tag)) - propka.output.writePKA(self, self.version.parameters, filename=filename, + propka.output.write_pka(self, self.version.parameters, filename=filename, conformation='AVR', reference=reference, direction=direction, options=options) From f38faaed970403c9a566e11d9f6c299ca8f98ffa Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 08:24:54 -0700 Subject: [PATCH 23/65] De-lint output.py. Public methods/members changed; dependencies checked via Google. --- propka/molecular_container.py | 3 +- propka/output.py | 670 ++++++++++++++++++---------------- propka/protonate.py | 4 +- 3 files changed, 365 insertions(+), 312 deletions(-) diff --git a/propka/molecular_container.py b/propka/molecular_container.py index bd544b2..76768c0 100644 --- a/propka/molecular_container.py +++ b/propka/molecular_container.py @@ -184,8 +184,7 @@ class Molecular_container: filename = os.path.join('%s_%s.pka' % (self.name, self.version.parameters.output_file_tag)) propka.output.write_pka(self, self.version.parameters, filename=filename, - conformation='AVR', reference=reference, - direction=direction, options=options) + conformation='AVR', reference=reference) def get_folding_profile(self, conformation='AVR', reference="neutral", grid=[0., 14., 0.1]): diff --git a/propka/output.py b/propka/output.py index 93c0e05..8419cb6 100644 --- a/propka/output.py +++ b/propka/output.py @@ -1,397 +1,451 @@ - -from __future__ import division -from __future__ import print_function - -import sys - -import propka.lib -from propka.lib import info, warning +"""Output routines.""" +from datetime import date +from propka.lib import info -def printHeader(): +def print_header(): + """Print header section of output.""" + str_ = "%s\n" % get_propka_header() + str_ += "%s\n" % get_references_header() + str_ += "%s\n" % get_warning_header() + info(str_) + + +def write_pdb(protein, pdbfile=None, filename=None, include_hydrogens=False, + _=None): + """Write a residue to the new PDB file. + + Args: + protein: protein object + pdbfile: PDB file + filename: file to write to + include_hydrogens: Boolean indicating whether to include hydrogens + options: options object """ - prints the header section - """ - str = "%s\n" % ( getPropkaHeader() ) - str += "%s\n" % ( getReferencesHeader() ) - str += "%s\n" % ( getWarningHeader() ) - - info(str) - - -def writePDB(protein, file=None, filename=None, include_hydrogens=False, options=None): - """ - Write the residue to the new pdbfile - """ - - if file == None: - # opening file if not given - if filename == None: - filename = "%s.pdb" % (protein.name) - file = open(filename, 'w') - info("writing pdbfile %s" % (filename)) - close_file = True + if pdbfile is None: + # opening file if not given + if filename is None: + filename = "%s.pdb" % (protein.name) + # TODO - this would be better as a context manager + pdbfile = open(filename, 'w') + info("writing pdbfile %s" % (filename)) + close_file = True else: - # don't close the file, it was opened in a different place - close_file = False - + # don't close the file, it was opened in a different place + close_file = False numb = 0 for chain in protein.chains: - for residue in chain.residues: - if residue.res_name not in ["N+ ", "C- "]: - for atom in residue.atoms: - if include_hydrogens == False and atom.name[0] == "H": - """ don't print """ - else: - numb += 1 - line = atom.make_pdb_line2(numb=numb) - line += "\n" - file.write(line) - - if close_file == True: - file.close() + for residue in chain.residues: + if residue.res_name not in ["N+ ", "C- "]: + for atom in residue.atoms: + if (not include_hydrogens) and atom.name[0] == "H": + # don't print + pass + else: + numb += 1 + line = atom.make_pdb_line2(numb=numb) + line += "\n" + pdbfile.write(line) + if close_file: + pdbfile.close() -def writePKA(protein, parameters, filename=None, conformation ='1A',reference="neutral", direction="folding", verbose=False, options=None): - """ - Write the pka-file based on the given protein +def write_pka(protein, parameters, filename=None, conformation='1A', + reference="neutral", _="folding", verbose=False, + __=None): + """Write the pKa-file based on the given protein. + + Args: + protein: protein object + filename: output file name + conformation: TODO - figure this out + reference: reference state + _: "folding" or other + verbose: Boolean flag for verbosity + __: options object """ + # TODO - the code immediately overrides the verbose argument; why? verbose = True - if filename == None: - filename = "%s.pka" % (protein.name) - file = open(filename, 'w') - if verbose == True: - info("Writing %s" % (filename)) - + if filename is None: + filename = "%s.pka" % (protein.name) + # TODO - this would be much better with a context manager + file_ = open(filename, 'w') + if verbose: + info("Writing %s" % (filename)) # writing propka header - str = "%s\n" % ( getPropkaHeader() ) - str += "%s\n" % ( getReferencesHeader() ) - str += "%s\n" % ( getWarningHeader() ) - + str_ = "%s\n" % get_propka_header() + str_ += "%s\n" % get_references_header() + str_ += "%s\n" % get_warning_header() # writing pKa determinant section - str += getDeterminantSection(protein,conformation, parameters) - + str_ += get_determinant_section(protein, conformation, parameters) # writing pKa summary section - str += getSummarySection(protein,conformation,parameters) - str += "%s\n" % ( getTheLine() ) - + str_ += get_summary_section(protein, conformation, parameters) + str_ += "%s\n" % get_the_line() # printing Folding Profile - str += get_folding_profileSection(protein, conformation=conformation, reference=reference, window=[0., 14., 1.0]) - + str_ += get_folding_profile_section(protein, conformation=conformation, + reference=reference, + window=[0., 14., 1.0]) # printing Protein Charge Profile - str += get_charge_profileSection(protein, conformation=conformation) - + str_ += get_charge_profile_section(protein, conformation=conformation) # now, writing the pka text to file - file.write(str) - - file.close() + file_.write(str_) + file_.close() -def printTmProfile(protein, reference="neutral", window=[0., 14., 1.], Tm=[0.,0.], Tms=None, ref=None, verbose=False, options=None): +def print_tm_profile(protein, reference="neutral", window=[0., 14., 1.], + __=[0., 0.], tms=None, ref=None, _=False, + options=None): + """Print Tm profile. + + I think Tm refers to the denaturation temperature. + + Args: + protein: protein object + reference: reference state + window: pH window [min, max, step] + __: temperature range [min, max] + tms: TODO - figure this out + ref: TODO - figure this out (probably reference state?) + _: Boolean for verbosity + options: options object """ - prints Tm profile - """ - profile = protein.getTmProfile(reference=reference, grid=[0., 14., 0.1], Tms=Tms, ref=ref, options=options) - if profile == None: - str = "Could not determine Tm-profile\n" + profile = protein.getTmProfile(reference=reference, grid=[0., 14., 0.1], + tms=tms, ref=ref, options=options) + if profile is None: + str_ = "Could not determine Tm-profile\n" else: - str = " suggested Tm-profile for %s\n" % (protein.name) - for (pH, Tm) in profile: - if pH >= window[0] and pH <= window[1] and (pH%window[2] < 0.01 or pH%window[2] > 0.99*window[2]): - str += "%6.2lf%10.2lf\n" % (pH, Tm) - info(str) + str_ = " suggested Tm-profile for %s\n" % (protein.name) + for (ph, tm_) in profile: + if ph >= window[0] and ph <= window[1] and (ph%window[2] < 0.01 \ + or ph%window[2] > 0.99*window[2]): + str_ += "%6.2lf%10.2lf\n" % (ph, tm_) + info(str_) -def printResult(protein, conformation, parameters): +def print_result(protein, conformation, parameters): + """Prints all resulting output from determinants and down. + + Args: + protein: protein object + conformation: specific conformation + parameters: parameters """ - prints all resulting output from determinants and down - """ - printPKASection(protein, conformation, parameters) + print_pka_section(protein, conformation, parameters) -def printPKASection(protein, conformation, parameters): - """ - prints out the pka-section of the result +def print_pka_section(protein, conformation, parameters): + """Prints out pKa section of results. + + Args: + protein: protein object + conformation: specific conformation + parameters: parameters """ # geting the determinants section - str = getDeterminantSection(protein, conformation, parameters) - info(str) - - str = getSummarySection(protein,conformation,parameters) - info(str) + str_ = get_determinant_section(protein, conformation, parameters) + info(str_) + str_ = get_summary_section(protein, conformation, parameters) + info(str_) -def getDeterminantSection(protein, conformation, parameters): - """ - prints out the pka-section of the result +def get_determinant_section(protein, conformation, parameters): + """Returns string with determinant section of results. + + Args: + protein: protein object + conformation: specific conformation + parameters: parameters + Returns: + string """ # getting the same order as in propka2.0 - str = "%s\n" % ( getDeterminantsHeader() ) + str_ = "%s\n" % get_determinants_header() # printing determinants for chain in protein.conformations[conformation].chains: for residue_type in parameters.write_out_order: - groups = [g for g in protein.conformations[conformation].groups if g.atom.chain_id == chain] + groups = [g for g in protein.conformations[conformation].groups \ + if g.atom.chain_id == chain] for group in groups: if group.residue_type == residue_type: - str += "%s" % ( group.get_determinant_string(parameters.remove_penalised_group) ) - + str_ += "%s" \ + % group.get_determinant_string(parameters.remove_penalised_group) # Add a warning in case of coupled residues - if protein.conformations[conformation].non_covalently_coupled_groups and not protein.options.display_coupled_residues: - str += 'Coupled residues (marked *) were detected. Please rerun PropKa with the --display-coupled-residues \nor -d option for detailed information.\n' - - return str + if protein.conformations[conformation].non_covalently_coupled_groups \ + and not protein.options.display_coupled_residues: + str_ += 'Coupled residues (marked *) were detected.' + str_ += 'Please rerun PropKa with the --display-coupled-residues \n' + str_ += 'or -d option for detailed information.\n' + return str_ -def getSummarySection(protein, conformation, parameters): +def get_summary_section(protein, conformation, parameters): + """Returns string with summary section of the results. + + Args: + protein: protein object + conformation: specific conformation + parameters: parameters + Returns: + string """ - prints out the pka-section of the result - """ - str = "%s\n" % ( getSummaryHeader() ) + str_ = "%s\n" % get_summary_header() # printing pKa summary for residue_type in parameters.write_out_order: for group in protein.conformations[conformation].groups: - if group.residue_type == residue_type: - str += "%s" % ( group.get_summary_string(parameters.remove_penalised_group) ) - - return str + if group.residue_type == residue_type: + str_ += "%s" \ + % group.get_summary_string(parameters.remove_penalised_group) + return str_ -def get_folding_profileSection(protein, conformation='AVR', direction="folding", reference="neutral", window=[0., 14., 1.0], verbose=False, options=None): +def get_folding_profile_section(protein, conformation='AVR', + direction="folding", reference="neutral", + window=[0., 14., 1.0], _=False, + __=None): + """Returns string with the folding profile section of the results. + + Args: + protein: protein object + conformation: specific conformation + direction: 'folding' or other + reference: reference state + window: pH window [min, max, step] + _: Boolean for verbose output + __: options object + Returns: + string """ - returns the protein-folding-profile section - """ - str = getTheLine() - str += "\n" - str += "Free energy of %9s (kcal/mol) as a function of pH (using %s reference)\n" % (direction, reference) - - profile, [pH_opt, dG_opt], [dG_min, dG_max], [pH_min, pH_max] = protein.get_folding_profile(conformation=conformation, - reference=reference, - grid=[0., 14., 0.1]) - if profile == None: - str += "Could not determine folding profile\n" + str_ = get_the_line() + str_ += "\n" + str_ += "Free energy of %9s (kcal/mol) as a function" % direction + str_ += " of pH (using %s reference)\n" % reference + profile, [ph_opt, dg_opt], [dg_min, dg_max], [ph_min, ph_max] \ + = protein.get_folding_profile(conformation=conformation, + reference=reference, grid=[0., 14., 0.1]) + if profile is None: + str_ += "Could not determine folding profile\n" else: - for (pH, dG) in profile: - if pH >= window[0] and pH <= window[1] and (pH%window[2] < 0.05 or pH%window[2] > 0.95): - str += "%6.2lf%10.2lf\n" % (pH, dG) - str += "\n" - - if pH_opt == None or dG_opt == None: - str += "Could not determine pH optimum\n" + for (ph, dg) in profile: + if ph >= window[0] and ph <= window[1]: + if ph%window[2] < 0.05 or ph%window[2] > 0.95: + str_ += "%6.2lf%10.2lf\n" % (ph, dg) + str_ += "\n" + if ph_opt is None or dg_opt is None: + str_ += "Could not determine pH optimum\n" else: - str += "The pH of optimum stability is %4.1lf for which the free energy is%6.1lf kcal/mol at 298K\n" % (pH_opt, dG_opt) - - if dG_min == None or dG_max == None: - str += "Could not determine pH values where the free energy is within 80 %s of minimum\n" % ("%") + str_ += "The pH of optimum stability is %4.1lf" % ph_opt + str_ += " for which the free energy is %6.1lf kcal/mol at 298K\n" % dg_opt + if dg_min is None or dg_max is None: + str_ += "Could not determine pH values where the free energy" + str_ += " is within 80 %s of minimum\n" % ("%") else: - str += "The free energy is within 80 %s of maximum at pH %4.1lf to %4.1lf\n" % ("%", dG_min, dG_max) - - if pH_min == None or pH_max == None: - str += "Could not determine the pH-range where the free energy is negative\n\n" + str_ += "The free energy is within 80 \% of maximum" + str_ += " at pH %4.1lf to %4.1lf\n" % (dg_min, dg_max) + if ph_min is None or ph_max is None: + str_ += "Could not determine the pH-range where the free" + str_ += " energy is negative\n\n" else: - str += "The free energy is negative in the range %4.1lf - %4.1lf\n\n" % (pH_min, pH_max) + str_ += "The free energy is negative in the range" + str_ += " %4.1lf - %4.1lf\n\n" % (ph_min, ph_max) + return str_ - return str +def get_charge_profile_section(protein, conformation='AVR', _=None): + """Returns string with the charge profile section of the results. - - -def get_charge_profileSection(protein, conformation='AVR', options=None): + Args: + protein: protein object + conformation: specific conformation + _: options object + Returns: + string """ - returns the protein-folding-profile section - """ - str = "Protein charge of folded and unfolded state as a function of pH\n" - - profile = protein.get_charge_profile(conformation=conformation,grid=[0., 14., 1.]) - if profile == None: - str += "Could not determine charge profile\n" + str_ = "Protein charge of folded and unfolded state as a function of pH\n" + profile = protein.get_charge_profile(conformation=conformation, + grid=[0., 14., 1.]) + if profile is None: + str_ += "Could not determine charge profile\n" else: - str += "%6s%10s%8s\n" % ("pH", "unfolded", "folded") - for (pH, Q_mod, Q_pro) in profile: - str += "%6.2lf%10.2lf%8.2lf\n" % (pH, Q_mod, Q_pro) - - - pI_pro, pI_mod = protein.get_pi(conformation=conformation) - if pI_pro == None or pI_mod == None: - str += "Could not determine the pI\n\n" + str_ += "%6s%10s%8s\n" % ("pH", "unfolded", "folded") + for (ph, q_mod, q_pro) in profile: + str_ += "%6.2lf%10.2lf%8.2lf\n" % (ph, q_mod, q_pro) + pi_pro, pi_mod = protein.get_pi(conformation=conformation) + if pi_pro is None or pi_mod is None: + str_ += "Could not determine the pI\n\n" else: - str += "The pI is %5.2lf (folded) and %5.2lf (unfolded)\n" % (pI_pro, pI_mod) + str_ += "The pI is %5.2lf (folded) and %5.2lf (unfolded)\n" % (pi_pro, + pi_mod) + return str_ - return str +def write_jackal_scap_file(mutation_data=None, filename="1xxx_scap.list", + _=None): + """Write a scap file for, i.e., generating a mutated protein - -def writeJackalScapFile(mutationData=None, filename="1xxx_scap.list", options=None): + TODO - figure out what this is """ - writing a scap file for, i.e., generating a mutated protein + with open(filename, 'w') as file_: + for chain_id, _, res_num, code2 in mutation_data: + str_ = "%s, %d, %s\n" % (chain_id, res_num, code2) + file_.write(str_) + + +def write_scwrl_sequence_file(sequence, filename="x-ray.seq", _=None): + """Write a scwrl sequence file for, e.g., generating a mutated protein + + TODO - figure out what this is """ - file = open(filename, 'w') - - for chain_id, code1, res_num, code2 in mutationData: - str = "%s, %d, %s\n" % (chain_id, res_num, code2) - file.write(str) - file.close() + with open(filename, 'w') as file_: + start = 0 + while len(sequence[start:]) > 60: + file_.write("%s\n" % (sequence[start:start+60])) + start += 60 + file_.write("%s\n" % (sequence[start:])) -def writeScwrlSequenceFile(sequence, filename="x-ray.seq", options=None): +def get_propka_header(): + """Create the header. + + Returns: + string """ - writing a scwrl sequence file for, e.g., generating a mutated protein - """ - file = open(filename, 'w') - - start = 0 - while len(sequence[start:]) > 60: - file.write( "%s\n" % (sequence[start:start+60]) ) - start += 60 - file.write( "%s\n" % (sequence[start:]) ) - - file.close() - - - -# --- various header text --- # - - -def getPropkaHeader(): - """ - Creates the header - """ - from datetime import date today = date.today() + str_ = "propka3.1 %93s\n" % (today) + str_ += "-------------------------------------------------------------------------------------------------------\n" + str_ += "-- --\n" + str_ += "-- PROPKA: A PROTEIN PKA PREDICTOR --\n" + str_ += "-- --\n" + str_ += "-- VERSION 1.0, 04/25/2004, IOWA CITY --\n" + str_ += "-- BY HUI LI --\n" + str_ += "-- --\n" + str_ += "-- VERSION 2.0, 11/05/2007, IOWA CITY/COPENHAGEN --\n" + str_ += "-- BY DELPHINE C. BAS AND DAVID M. ROGERS --\n" + str_ += "-- --\n" + str_ += "-- VERSION 3.0, 01/06/2011, COPENHAGEN --\n" + str_ += "-- BY MATS H.M. OLSSON AND CHRESTEN R. SONDERGARD --\n" + str_ += "-- --\n" + str_ += "-- VERSION 3.1, 07/01/2011, COPENHAGEN --\n" + str_ += "-- BY CHRESTEN R. SONDERGARD AND MATS H.M. OLSSON --\n" + str_ += "-------------------------------------------------------------------------------------------------------\n" + str_ += "\n" + return str_ +def get_references_header(): + """Create the 'references' part of output file. - str = "propka3.1 %93s\n" % (today) - str += "-------------------------------------------------------------------------------------------------------\n" - str += "-- --\n" - str += "-- PROPKA: A PROTEIN PKA PREDICTOR --\n" - str += "-- --\n" - str += "-- VERSION 1.0, 04/25/2004, IOWA CITY --\n" - str += "-- BY HUI LI --\n" - str += "-- --\n" - str += "-- VERSION 2.0, 11/05/2007, IOWA CITY/COPENHAGEN --\n" - str += "-- BY DELPHINE C. BAS AND DAVID M. ROGERS --\n" - str += "-- --\n" - str += "-- VERSION 3.0, 01/06/2011, COPENHAGEN --\n" - str += "-- BY MATS H.M. OLSSON AND CHRESTEN R. SONDERGARD --\n" - str += "-- --\n" - str += "-- VERSION 3.1, 07/01/2011, COPENHAGEN --\n" - str += "-- BY CHRESTEN R. SONDERGARD AND MATS H.M. OLSSON --\n" - str += "-------------------------------------------------------------------------------------------------------\n" - str += "\n" - - - - return str - - -def getReferencesHeader(): + Returns: + string """ - Returns the 'references' part in output file + str_ = "" + str_ += "-------------------------------------------------------------------------------------------------------\n" + str_ += " References:\n" + str_ += "\n" + str_ += " Very Fast Empirical Prediction and Rationalization of Protein pKa Values\n" + str_ += " Hui Li, Andrew D. Robertson and Jan H. Jensen\n" + str_ += " PROTEINS: Structure, Function, and Bioinformatics 61:704-721 (2005)\n" + str_ += " \n" + str_ += " Very Fast Prediction and Rationalization of pKa Values for Protein-Ligand Complexes\n" + str_ += " Delphine C. Bas, David M. Rogers and Jan H. Jensen\n" + str_ += " PROTEINS: Structure, Function, and Bioinformatics 73:765-783 (2008)\n" + str_ += " \n" + str_ += " PROPKA3: Consistent Treatment of Internal and Surface Residues in Empirical pKa predictions\n" + str_ += " Mats H.M. Olsson, Chresten R. Sondergard, Michal Rostkowski, and Jan H. Jensen\n" + str_ += " Journal of Chemical Theory and Computation, 7(2):525-537 (2011)\n" + str_ += " \n" + str_ += " Improved Treatment of Ligands and Coupling Effects in Empirical Calculation\n" + str_ += " and Rationalization of pKa Values\n" + str_ += " Chresten R. Sondergaard, Mats H.M. Olsson, Michal Rostkowski, and Jan H. Jensen\n" + str_ += " Journal of Chemical Theory and Computation, (2011)\n" + str_ += " \n" + str_ += "-------------------------------------------------------------------------------------------------------\n" + return str_ + + +def get_warning_header(): + """Create the 'warning' part of the output file. + + TODO - this function is essentially a no-op. + + Returns: + string """ - - str = "" - str += "-------------------------------------------------------------------------------------------------------\n" - str += " References:\n" - str += "\n" - str += " Very Fast Empirical Prediction and Rationalization of Protein pKa Values\n" - str += " Hui Li, Andrew D. Robertson and Jan H. Jensen\n" - str += " PROTEINS: Structure, Function, and Bioinformatics 61:704-721 (2005)\n" - str += " \n" - str += " Very Fast Prediction and Rationalization of pKa Values for Protein-Ligand Complexes\n" - str += " Delphine C. Bas, David M. Rogers and Jan H. Jensen\n" - str += " PROTEINS: Structure, Function, and Bioinformatics 73:765-783 (2008)\n" - str += " \n" - str += " PROPKA3: Consistent Treatment of Internal and Surface Residues in Empirical pKa predictions\n" - str += " Mats H.M. Olsson, Chresten R. Sondergard, Michal Rostkowski, and Jan H. Jensen\n" - str += " Journal of Chemical Theory and Computation, 7(2):525-537 (2011)\n" - str += " \n" - str += " Improved Treatment of Ligands and Coupling Effects in Empirical Calculation\n" - str += " and Rationalization of pKa Values\n" - str += " Chresten R. Sondergaard, Mats H.M. Olsson, Michal Rostkowski, and Jan H. Jensen\n" - str += " Journal of Chemical Theory and Computation, (2011)\n" - str += " \n" - str += "-------------------------------------------------------------------------------------------------------\n" - - return str + str_ = "" + return str_ -def getWarningHeader(): +def get_determinants_header(): + """Create the Determinant header. + + Returns: + string """ - Returns the 'warning' part in output file + str_ = "" + str_ += "--------- ----- ------ --------------------- -------------- -------------- --------------\n" + str_ += " DESOLVATION EFFECTS SIDECHAIN BACKBONE COULOMBIC \n" + str_ += " RESIDUE pKa BURIED REGULAR RE HYDROGEN BOND HYDROGEN BOND INTERACTION \n" + str_ += "--------- ----- ------ --------- --------- -------------- -------------- --------------\n" + return str_ + + +def get_summary_header(): + """Create the summary header. + + Returns: + string """ - - str = "" - - return str + str_ = get_the_line() + str_ += "\n" + str_ += "SUMMARY OF THIS PREDICTION\n" + str_ += " Group pKa model-pKa ligand atom-type" + return str_ -def getDeterminantsHeader(): +def get_the_line(): + """Draw the line - Johnny Cash would have been proud - or actually Aerosmith! + + NOTE - Johnny Cash walked the line. + + Returns: + string """ - Creates the Determinant header + str_ = "" + str_ += ("-" * 104) + return str_ + + +def make_interaction_map(name, list_, interaction): + """Print out an interaction map named 'name' of the groups in 'list' + based on the function 'interaction' + + Args: + list_: list of groups + interaction: some sort of function + Returns: + string """ - str = "" - str += "--------- ----- ------ --------------------- -------------- -------------- --------------\n" - str += " DESOLVATION EFFECTS SIDECHAIN BACKBONE COULOMBIC \n" - str += " RESIDUE pKa BURIED REGULAR RE HYDROGEN BOND HYDROGEN BOND INTERACTION \n" - str += "--------- ----- ------ --------- --------- -------------- -------------- --------------\n" - - return str - - -def getSummaryHeader(): - """ - returns the summary header - """ - str = getTheLine() - str += "\n" - str += "SUMMARY OF THIS PREDICTION\n" - str += " Group pKa model-pKa ligand atom-type" - - return str - - -def getTheLine(): - """ - draw the line - Johnny Cash would have been proud - or actually Aerosmith! - """ - str = "" - for i in range(0, 104): - str += "-" - - return str - - -# Interaction maps -def make_interaction_map(name, list, interaction): - """ Print out an interaction map named 'name' of the groups in 'list' - based on the function 'interaction' """ - # return an empty string, if the list is empty - if len(list)==0: + if len(list_) == 0: return '' - # for long list, use condensed formatting - if len(list)>10: + if len(list_) > 10: res = 'Condensed form:\n' - for i in range(len(list)): - for j in range(i,len(list)): - if interaction(list[i],list[j]): - res += 'Coupling: %9s - %9s\n'%(list[i].label,list[j].label) + for i, group1 in enumerate(list_): + for group2 in list_[i:]: + if interaction(group1, group2): + res += 'Coupling: %9s - %9s\n' % (group1.label, + group2.label) return res - # Name and map header - res = '%s\n%12s'%(name,'') - for g in list: - res += '%9s | '%g.label - + res = '%s\n%12s' % (name, '') + for group in list_: + res += '%9s | ' % group.label # do the map - for g1 in list: - res += '\n%-12s'%(g1.label) - for g2 in list: + for group1 in list_: + res += '\n%-12s' % (group1.label) + for group2 in list_: tag = '' - if interaction(g1, g2): + if interaction(group1, group2): tag = ' X ' res += '%10s| '%tag - return res - diff --git a/propka/protonate.py b/propka/protonate.py index cef3216..f6ff7e6 100644 --- a/propka/protonate.py +++ b/propka/protonate.py @@ -433,9 +433,9 @@ if __name__ == '__main__': my_protein = protein.Protein(pdblist,'test.pdb') p.remove_all_hydrogen_atoms_from_protein(my_protein) - my_protein.writePDB('before_protonation.pdb') + my_protein.write_pdb('before_protonation.pdb') p.protonate_protein(my_protein) ## write out protonated file - my_protein.writePDB('protonated.pdb') + my_protein.write_pdb('protonated.pdb') From c7c626791a895d6868a1a78a95e8e08d2330bb7c Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 09:55:09 -0700 Subject: [PATCH 24/65] Remove duplicate propka.cfg. --- propka.cfg | 398 ----------------------------------------------------- 1 file changed, 398 deletions(-) delete mode 100644 propka.cfg diff --git a/propka.cfg b/propka.cfg deleted file mode 100644 index 58b1aec..0000000 --- a/propka.cfg +++ /dev/null @@ -1,398 +0,0 @@ -# PropKa configuration file - -version version_A - -# Model pKa values -model_pkas C- 3.20 -model_pkas ASP 3.80 -model_pkas GLU 4.50 -model_pkas HIS 6.50 -model_pkas CYS 9.00 -model_pkas TYR 10.00 -model_pkas LYS 10.50 -model_pkas ARG 12.50 -#model_pkas SER 14.20 Jack Kyte: Structure in Protein Chemistry, 1995, Garland Publishing, Inc New York and London -model_pkas N+ 8.00 -model_pkas CG 11.50 -model_pkas C2N 11.50 -model_pkas N30 10.00 -model_pkas N31 10.00 -model_pkas N32 10.00 -model_pkas N33 10.00 -model_pkas NAR 5.00 -model_pkas OCO 4.50 -model_pkas SH 10.00 -model_pkas OP 6.00 - -# Custom ligand pKa values -# P. Acharya, P. Cheruku, S. Chatterjee, S. Acharya, and, J. Chattopadhyaya: -# Measurement of Nucleobase pKa Values in Model Mononucleotides -# Shows RNA-RNA Duplexes To Be More Stable than DNA-DNA Duplexes -# Journal of the American Chemical Society 2004 126 (9), 2862-2869 -# -custom_model_pkas DA-N1 3.82 -custom_model_pkas DA-N3 3.82 -custom_model_pkas DA-N7 3.82 -custom_model_pkas DA-OP1 1.00 -custom_model_pkas DA-OP2 1.00 - -custom_model_pkas DG-N1 9.59 -custom_model_pkas DG-N3 9.59 -custom_model_pkas DG-N7 9.59 -custom_model_pkas DG-OP1 1.00 -custom_model_pkas DG-OP2 1.00 - -custom_model_pkas DC-N3 4.34 -custom_model_pkas DC-OP1 1.00 -custom_model_pkas DC-OP2 1.00 - -custom_model_pkas DT-N3 10.12 -custom_model_pkas DT-OP1 1.00 -custom_model_pkas DT-OP2 1.00 - - -# protein group mapping -protein_group_mapping ASP-CG COO -protein_group_mapping GLU-CD COO -protein_group_mapping HIS-CG HIS -protein_group_mapping CYS-SG CYS -protein_group_mapping TYR-OH TYR -protein_group_mapping LYS-NZ LYS -protein_group_mapping ARG-CZ ARG -#protein_group_mapping SER-OG SER -protein_group_mapping THR-OG1 ROH -protein_group_mapping SER-OG ROH# -protein_group_mapping ASN-CG AMD -protein_group_mapping GLN-CD AMD -protein_group_mapping TRP-NE1 TRP - - -# matrix for propka interactions -# 'N' non-iterative interaction -# 'I' iterative interaction -# '-' no interaction - #CYS -interaction_matrix CYS I#N+ -interaction_matrix N+ N I#HIS -interaction_matrix HIS I N I#LYS -interaction_matrix LYS N N N I#AMD -interaction_matrix AMD N - N - -#COO -interaction_matrix COO I N I N N I#ARG -interaction_matrix ARG N N N N - N I#TRP -interaction_matrix TRP N - - - - N - -#ROH -interaction_matrix ROH N - - - - N - - -#TYR -interaction_matrix TYR N I I I N N N N N I#SER -interaction_matrix SER N N N N N N I N N N I #CG -interaction_matrix CG N N N N - N I - - N I I#C2N -interaction_matrix C2N N N N N - N I - - N I I I#N30 -interaction_matrix N30 N I N N - N N - - I N I I I#N31 -interaction_matrix N31 N I N N - N N - - I N I I I I#N32 -interaction_matrix N32 N I N N - N N - - I N I I I I I#N33 -interaction_matrix N33 N I N N - N N - - I N I I I I I I#NAR -interaction_matrix NAR I N I I N I N - - I N N N N N N N I#OCO -interaction_matrix OCO I N I N N I N N N N N N N N N N N I I#NP1 -interaction_matrix NP1 N - N - - N - - - N N - - - - - - N N -#OH -interaction_matrix OH N - - - - N - - - N N - - - - - - - N - -#O3 -interaction_matrix O3 N - N - - N - - - N N - - - - - - N N - - -#CL -interaction_matrix CL N - N - - N - - - N N - - - - - - N N - - - -#F -interaction_matrix F N - N - - N - - - N N - - - - - - N N - - - - -#NAM -interaction_matrix NAM N - N - - N - - - N N - - - - - - N N - - - - - -#N1 -interaction_matrix N1 N - N - - N - - - N N - - - - - - N N - - - - - - -#O2 -interaction_matrix O2 N - N - - N - - - N N - - - - - - N N - - - - - - - -#OP -interaction_matrix OP I N I N N I N N N N N N N N N N N I I N N N N N N N N I#SH -interaction_matrix SH I N N N N N N N N N N I I I I I I N N N N N N N N N N N I - -# Cutoff values for side chain interactions -# default value -sidechain_cutoffs default 3.0 4.0 -# COO -sidechain_cutoffs COO COO 2.5 3.5 -Sidechain_cutoffs COO SER 2.65 3.65 -sidechain_cutoffs COO ARG 1.85 2.85 -sidechain_cutoffs COO LYS 2.85 3.85 -sidechain_cutoffs COO HIS 2.0 3.0 -sidechain_cutoffs COO AMD 2.0 3.0 -sidechain_cutoffs COO TRP 2.0 3.0 -sidechain_cutoffs COO ROH 2.65 3.65 -sidechain_cutoffs COO TYR 2.65 3.65 -sidechain_cutoffs COO N+ 2.85 3.85 -sidechain_cutoffs COO CG 1.85 2.85 -sidechain_cutoffs COO C2N 1.85 2.85 -sidechain_cutoffs COO N30 2.85 3.85 -sidechain_cutoffs COO N31 2.85 3.85 -sidechain_cutoffs COO N32 2.85 3.85 -sidechain_cutoffs COO N33 2.85 3.85 -sidechain_cutoffs COO NAR 2.0 3.0 -sidechain_cutoffs COO OCO 2.5 3.5 -sidechain_cutoffs COO OH 2.65 3.65 -sidechain_cutoffs COO NAM 2.0 3.0 -# SER -sidechain_cutoffs SER SER 3.5 4.5 -sidechain_cutoffs SER ARG 2.5 4.0 -sidechain_cutoffs SER HIS 2.0 3.0 -sidechain_cutoffs SER AMD 2.5 3.5 -sidechain_cutoffs SER CYS 3.5 4.5 -sidechain_cutoffs SER TRP 2.5 3.5 -sidechain_cutoffs SER ROH 3.5 4.5 -sidechain_cutoffs SER CG 2.5 4.0 -sidechain_cutoffs SER C2N 2.5 4.0 -sidechain_cutoffs SER NAR 2.0 3.0 -sidechain_cutoffs SER OH 3.5 4.5 -sidechain_cutoffs SER SH 3.5 4.5 -sidechain_cutoffs SER TYR 3.5 4.5 -sidechain_cutoffs SER N+ 3.0 4.5 -sidechain_cutoffs SER NAM 2.5 3.5 -# ARG -sidechain_cutoffs ARG CYS 2.5 4.0 -sidechain_cutoffs ARG TYR 2.5 4.0 -sidechain_cutoffs ARG OCO 1.85 2.85 -sidechain_cutoffs ARG SH 2.5 4.0 -# HIS -sidechain_cutoffs HIS AMD 2.0 3.0 -sidechain_cutoffs HIS TYR 2.0 3.0 -sidechain_cutoffs HIS OCO 2.0 3.0 -# CYS -sidechain_cutoffs CYS CYS 3.0 5.0 -sidechain_cutoffs CYS TRP 2.5 3.5 -sidechain_cutoffs CYS ROH 3.5 4.5 -sidechain_cutoffs CYS AMD 2.5 3.5 -sidechain_cutoffs CYS TYR 3.5 4.5 -sidechain_cutoffs CYS N+ 3.0 4.5 -sidechain_cutoffs CYS CG 2.5 4.0 -sidechain_cutoffs CYS C2N 2.5 4.0 -sidechain_cutoffs CYS N30 3.0 4.5 -sidechain_cutoffs CYS N31 3.0 4.5 -sidechain_cutoffs CYS N32 3.0 4.5 -sidechain_cutoffs CYS N33 3.0 4.5 -sidechain_cutoffs CYS OH 3.5 4.5 -sidechain_cutoffs CYS NAM 2.5 3.5 -sidechain_cutoffs CYS SH 3.0 5.0 -# TYR -sidechain_cutoffs TYR TYR 3.5 4.5 -sidechain_cutoffs TYR N+ 3.0 4.5 -sidechain_cutoffs TYR AMD 2.5 3.5 -sidechain_cutoffs TYR TRP 2.5 3.5 -sidechain_cutoffs TYR ROH 3.5 4.5 -sidechain_cutoffs TYR CG 2.5 4.0 -sidechain_cutoffs TYR C2N 2.5 4.0 -sidechain_cutoffs TYR OCO 2.65 3.65 -sidechain_cutoffs TYR NAR 2.0 3.0 -sidechain_cutoffs TYR OH 3.5 4.5 -sidechain_cutoffs TYR NAM 2.5 3.5 -sidechain_cutoffs TYR SH 3.5 4.5 -# N+ -sidechain_cutoffs N+ OCO 2.85 3.85 -sidechain_cutoffs N+ SH 3.0 4.5 -# LYS -sidechain_cutoffs LYS OCO 2.85 3.85 -# OCO -sidechain_cutoffs OCO OCO 2.5 3.5 -sidechain_cutoffs OCO TRP 2.0 3.0 -sidechain_cutoffs OCO ROH 2.65 3.65 -sidechain_cutoffs OCO AMD 2.0 3.0 -sidechain_cutoffs OCO CG 1.85 2.85 -sidechain_cutoffs OCO C2N 1.85 2.85 -sidechain_cutoffs OCO N30 2.85 3.85 -sidechain_cutoffs OCO N31 2.85 3.85 -sidechain_cutoffs OCO N32 2.85 3.85 -sidechain_cutoffs OCO N33 2.85 3.85 -sidechain_cutoffs OCO NAR 2.0 3.0 -sidechain_cutoffs OCO OH 2.65 3.65 -sidechain_cutoffs OCO NAM 2.0 3.0 -# NAR -sidechain_cutoffs NAR AMD 2.0 3.0 -# SH -sidechain_cutoffs SH ROH 3.5 4.5 -sidechain_cutoffs SH TRP 2.5 3.5 -sidechain_cutoffs SH AMD 2.5 3.5 -sidechain_cutoffs SH NAM 2.5 3.5 -sidechain_cutoffs SH CG 2.5 4.0 -sidechain_cutoffs SH C2N 2.5 4.0 -sidechain_cutoffs SH OH 3.5 4.5 -sidechain_cutoffs SH SH 3.0 5.0 - - - -# Maximal interaction energies for side chains -sidechain_interaction 0.85 - -# Angular dependent sidechain interactions -angular_dependent_sidechain_interactions HIS -angular_dependent_sidechain_interactions ARG -angular_dependent_sidechain_interactions AMD -angular_dependent_sidechain_interactions TRP - -# exception interaction values -COO_HIS_exception 1.60 -OCO_HIS_exception 1.60 -CYS_HIS_exception 1.60 -CYS_CYS_exception 3.60 - -# Coulomb interaction parameters -coulomb_cutoff1 4.0 -coulomb_cutoff2 10.0 -coulomb_diel 80.0 - -# Backbone hydrogen bond parameters -backbone_NH_hydrogen_bond COO -0.85 2.00 3.00 -#backbone_NH_hydrogen_bond C- -0.85 2.00 3.00 -backbone_NH_hydrogen_bond CYS -0.85 3.00 4.00 -backbone_NH_hydrogen_bond TYR -0.85 2.20 3.20 -backbone_NH_hydrogen_bond OCO -0.85 2.00 3.50 -backbone_NH_hydrogen_bond NAR -0.85 2.00 3.50 - -backbone_CO_hydrogen_bond HIS 0.85 2.00 3.00 -backbone_CO_hydrogen_bond OCO 0.85 3.00 4.00 -backbone_CO_hydrogen_bond CG 0.85 2.00 4.00 -backbone_CO_hydrogen_bond C2N 0.85 2.00 4.00 -backbone_CO_hydrogen_bond N30 0.85 2.00 4.00 -backbone_CO_hydrogen_bond N31 0.85 2.00 4.00 -backbone_CO_hydrogen_bond N32 0.85 2.00 4.00 -backbone_CO_hydrogen_bond N33 0.85 2.00 4.00 -backbone_CO_hydrogen_bond NAR 0.85 2.00 3.50 - -# Group charges -charge COO -1 -charge HIS +1 -charge CYS -1 -charge TYR -1 -charge LYS +1 -charge ARG +1 -charge N+ +1 -charge C- -1 -charge OCO -1 -charge SER -1 -charge CG +1 -charge C2N +1 -charge N30 +1 -charge N31 +1 -charge N32 +1 -charge N33 +1 -charge NAR +1 -charge SH -1 -charge OP -1 - -# list of acids -acid_list ASP -acid_list GLU -acid_list CYS -acid_list TYR -acid_list SER -acid_list C- -acid_list OCO -acid_list OP -acid_list SH - -# list of bases -base_list ARG -base_list LYS -base_list HIS -base_list N+ -base_list CG -base_list C2N -base_list N30 -base_list N31 -base_list N32 -base_list N33 -base_list NAR - -# list of groups used in backbone reorganisation calculations -backbone_reorganisation_list ASP -backbone_reorganisation_list GLU - -# Residues that should be ignored -ignore_residues HOH -ignore_residues H2O -ignore_residues HOH -ignore_residues SO4 -ignore_residues PO4 -ignore_residues PEG -ignore_residues EPE -#ignore_residues NAG -ignore_residues TRS - -# Relative Van der Waals volume parameters for the radial volume model -# Radii adopted from Bondi, A. (1964). "Van der Waals Volumes and Radii". J. Phys. Chem. 68 (3): 441-51 -VanDerWaalsVolume C 1.40 # radius: 1.70, volume: 20.58 all 'C' and 'CA' atoms -VanDerWaalsVolume C4 2.64 # 38.79 hydrodphobic carbon atoms + unidentified atoms -VanDerWaalsVolume N 1.06 # radius: 1.55, volume: 15.60 all nitrogen atoms -VanDerWaalsVolume O 1.00 # radius: 1.52, volume: 14.71 all oxygen atoms -VanDerWaalsVolume S 1.66 # radius: 1.80, volume: 24.43 all sulphur atoms -VanDerWaalsVolume F 0.90 # raidus: 1.47, volume: 13.30 for fluorine -VanDerWaalsVolume Cl 1.53 # radius: 1.75, volume: 22.44 for chlorine -VanDerWaalsVolume P 1.66 # radius: 1.80, volume: 24.42 for phosphorus - -# Other desolvation parameters -desolvationSurfaceScalingFactor 0.25 -desolvationPrefactor -13.0 -desolvationAllowance 0.0 -desolv_cutoff 20.0 -buried_cutoff 15.0 -Nmin 280 -Nmax 560 - -# Ligand groups -ligand_typing groups -min_bond_distance_for_hydrogen_bonds 4 - -# covalent coupling -coupling_max_number_of_bonds 3 -shared_determinants 0 -common_charge_centre 0 -remove_penalised_group 1 - -# non-covalent coupling -max_intrinsic_pka_diff 2.0 -min_interaction_energy 0.5 -max_free_energy_diff 1.0 -min_swap_pka_shift 1.0 -min_pka 0.0 -max_pka 10.0 -pH variable -reference neutral - -# ions -ions 1P 1 # generic charged atoms -ions 2P 2 -ions 1N -1 -ions 2N -2 - -ions MG 2 #Magnesium Ion -ions CA 2 #Calcium Ion -ions ZN 2 #Zinc Ion -ions NA 1 #Sodium Ion -ions CL -1 #Chloride Ion -ions MN 2 #Manganese (ii) Ion -ions K 1 #Potassium Ion -ions CD 2 #Cadmium Ion -ions FE 3 #Fe (iii) Ion -ions SR 2 #Strontium Ion -ions CU 2 #Copper (ii) Ion -ions IOD -1 #Iodide Ion -ions HG 2 #Mercury (ii) Ion -ions BR -1 #Bromide Ion -ions CO 2 #Cobalt (ii) Ion -ions NI 2 #Nickel (ii) Ion -ions FE2 2 #Fe (ii) Ion - -# write out order of residues -write_out_order ASP -write_out_order GLU -write_out_order C- -write_out_order HIS -write_out_order CYS -write_out_order TYR -write_out_order LYS -write_out_order ARG -write_out_order SER -write_out_order N+ -write_out_order CG -write_out_order C2N -write_out_order N30 -write_out_order N31 -write_out_order N32 -write_out_order N33 -write_out_order NAR -write_out_order OCO -write_out_order SH -write_out_order OP From 32fbb3046412fb13c6ee6f2b42a5f3c3e1da32ef Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 09:58:51 -0700 Subject: [PATCH 25/65] De-lint parameters.py. --- propka/parameters.py | 657 ++++++++++++++++++++++--------------------- 1 file changed, 336 insertions(+), 321 deletions(-) diff --git a/propka/parameters.py b/propka/parameters.py index 90e47b5..3b6a886 100644 --- a/propka/parameters.py +++ b/propka/parameters.py @@ -1,232 +1,262 @@ - -from __future__ import division -from __future__ import print_function - -import math +"""Holds parameters and settings.""" +import pkg_resources import propka.lib as lib -import sys, os from propka.lib import info, warning -import pkg_resources # names and types of all key words in configuration file -matrices = ['interaction_matrix'] - -pair_wise_matrices = ['sidechain_cutoffs'] - -number_dictionaries = ['VanDerWaalsVolume','charge','model_pkas','ions', - 'valence_electrons','custom_model_pkas'] - -list_dictionaries = ['backbone_NH_hydrogen_bond','backbone_CO_hydrogen_bond'] - -string_dictionaries = ['protein_group_mapping'] - -string_lists = ['ignore_residues','angular_dependent_sidechain_interactions', - 'acid_list','base_list','exclude_sidechain_interactions', - 'backbone_reorganisation_list','write_out_order'] - -distances = ['desolv_cutoff','buried_cutoff','coulomb_cutoff1','coulomb_cutoff2'] - -parameters = ['Nmin','Nmax','desolvationSurfaceScalingFactor','desolvationPrefactor', - 'desolvationAllowance','coulomb_diel','COO_HIS_exception','OCO_HIS_exception', - 'CYS_HIS_exception','CYS_CYS_exception','min_ligand_model_pka','max_ligand_model_pka', - 'include_H_in_interactions','coupling_max_number_of_bonds', - 'min_bond_distance_for_hydrogen_bonds','coupling_penalty', - 'shared_determinants','common_charge_centre','hide_penalised_group', 'remove_penalised_group', - 'max_intrinsic_pka_diff','min_interaction_energy','max_free_energy_diff','min_swap_pka_shift', - 'min_pka','max_pka','sidechain_interaction'] - -strings = ['version','output_file_tag','ligand_typing','pH','reference'] - - +MATRICES = ['interaction_matrix'] +PAIR_WISE_MATRICES = ['sidechain_cutoffs'] +NUMBER_DICTIONARIES = ['VanDerWaalsVolume', 'charge', 'model_pkas', 'ions', + 'valence_electrons', 'custom_model_pkas'] +LIST_DICTIONARIES = ['backbone_NH_hydrogen_bond', 'backbone_CO_hydrogen_bond'] +STRING_DICTIONARIES = ['protein_group_mapping'] +STRING_LISTS = ['ignore_residues', 'angular_dependent_sidechain_interactions', + 'acid_list', 'base_list', 'exclude_sidechain_interactions', + 'backbone_reorganisation_list', 'write_out_order'] +DISTANCES = ['desolv_cutoff', 'buried_cutoff', 'coulomb_cutoff1', + 'coulomb_cutoff2'] +PARAMETERS = ['Nmin', 'Nmax', 'desolvationSurfaceScalingFactor', + 'desolvationPrefactor', 'desolvationAllowance', 'coulomb_diel', + 'COO_HIS_exception', 'OCO_HIS_exception', 'CYS_HIS_exception', + 'CYS_CYS_exception', 'min_ligand_model_pka', + 'max_ligand_model_pka', 'include_H_in_interactions', + 'coupling_max_number_of_bonds', + 'min_bond_distance_for_hydrogen_bonds', 'coupling_penalty', + 'shared_determinants', 'common_charge_centre', + 'hide_penalised_group', 'remove_penalised_group', + 'max_intrinsic_pka_diff', 'min_interaction_energy', + 'max_free_energy_diff', 'min_swap_pka_shift', 'min_pka', + 'max_pka', 'sidechain_interaction'] +STRINGS = ['version', 'output_file_tag', 'ligand_typing', 'pH', 'reference'] class Parameters: - def __init__(self, parameter_file): + """PROPKA parameter class.""" + def __init__(self, parameter_file): + """Initialize parameter class. + + Args: + parameter_file: file with parameters + """ + # TODO - need to define all members explicitly + self.model_pkas = {} + self.interaction_matrix = InteractionMatrix("interaction_matrix") + self.sidechain_cutoffs = None + # TODO - it would be nice to rename these but they're defined everywhere + self.COO_HIS_exception = None + self.OCO_HIS_exception = None + self.CYS_HIS_exception = None + self.CYS_CYS_exception = None + # These functions set up remaining data structures implicitly self.set_up_data_structures() self.read_parameters(parameter_file) - #self.print_interaction_parameters() - #self.print_interaction_parameters_latex() - #####self.print_interactions_latex() - #sys.exit(0) + def read_parameters(self, file_): + """Read parameters from file. - - return - - - def read_parameters(self, file): + Args: + file_: file to read + """ # try to locate the parameters file try: - ifile = pkg_resources.resource_filename(__name__, file) - input = lib.open_file_for_reading(ifile) - except: - input = lib.open_file_for_reading(file) - - for line in input: + ifile = pkg_resources.resource_filename(__name__, file_) + input_ = lib.open_file_for_reading(ifile) + except (IOError, FileNotFoundError, ValueError): + input_ = lib.open_file_for_reading(file_) + for line in input_: self.parse_line(line) - return - - def parse_line(self, line): + """Parse parameter file line.""" # first, remove comments comment_pos = line.find('#') if comment_pos != -1: line = line[:comment_pos] - # split the line into words words = line.split() if len(words) == 0: return - # parse the words - if len(words)==3 and words[0] in number_dictionaries: + if len(words) == 3 and words[0] in NUMBER_DICTIONARIES: self.parse_to_number_dictionary(words) - elif len(words)==2 and words[0] in string_lists: + elif len(words) == 2 and words[0] in STRING_LISTS: self.parse_to_string_list(words) - elif len(words)==2 and words[0] in distances: + elif len(words) == 2 and words[0] in DISTANCES: self.parse_distance(words) - elif len(words)==2 and words[0] in parameters: + elif len(words) == 2 and words[0] in PARAMETERS: self.parse_parameter(words) - elif len(words)==2 and words[0] in strings: + elif len(words) == 2 and words[0] in STRINGS: self.parse_string(words) - elif len(words)>2 and words[0] in list_dictionaries: + elif len(words) > 2 and words[0] in LIST_DICTIONARIES: self.parse_to_list_dictionary(words) - elif words[0] in matrices+pair_wise_matrices: + elif words[0] in MATRICES+PAIR_WISE_MATRICES: self.parse_to_matrix(words) - elif len(words)==3 and words[0] in string_dictionaries: + elif len(words) == 3 and words[0] in STRING_DICTIONARIES: self.parse_to_string_dictionary(words) - - #info(words) - - return - - def parse_to_number_dictionary(self, words): - exec('self.%s[\'%s\'] = %s'%tuple(words)) - return + """Parse field to number dictionary. + + Args: + words: strings to parse. + """ + dict_ = getattr(self, words[0]) + key = words[1] + value = words[2] + dict_[key] = float(value) def parse_to_string_dictionary(self, words): - exec('self.%s[\'%s\'] = \'%s\''%tuple(words)) - return + """Parse field to string dictionary. + + Args: + words: strings to parse + """ + dict_ = getattr(self, words[0]) + key = words[1] + value = words[2] + dict_[key] = value def parse_to_list_dictionary(self, words): - exec('if not \'%s\' in self.%s.keys(): self.%s[\'%s\'] = []'%(words[1],words[0],words[0],words[1])) - for word in words[2:]: - exec('self.%s[\'%s\'].append(%s)'%(words[0],words[1],word)) + """Parse field to list dictionary. - return + Args: + words: strings to parse. + """ + dict_ = getattr(self, words[0]) + key = words[1] + if not key in dict_: + dict_[key] = [] + for value in words[2:]: + if isinstance(value, list): + dict_[key].append([float(x) for x in value]) + dict_[key].append(float(value)) def parse_to_string_list(self, words): - exec('self.%s.append(\'%s\')'%tuple(words)) - return + """Parse field to string list. + + Args: + words: strings to parse + """ + list_ = getattr(self, words[0]) + value = words[1] + list_.append(value) def parse_to_matrix(self, words): - exec('self.%s.add(%s)'%(words[0],tuple(words[1:]))) - return + """Parse field to matrix. + + Args: + words: strings to parse + """ + matrix = getattr(self, words[0]) + value = tuple(words[1:]) + matrix.add(value) def parse_distance(self, words): - # float check needed - exec('self.%s = %s'%tuple(words)) - exec('self.%s_squared = pow(%s,2)'%tuple(words)) - return + """Parse field to distance. + + Args: + words: strings to parse + """ + value = float(words[1]) + setattr(self, words[0], value) + value_sq = value*value + setattr(self, "%s_squared" % words[0], value_sq) def parse_parameter(self, words): - exec('self.%s = %s'%tuple(words)) - return + """Parse field to parameters. + + Args: + words: strings to parse + """ + value = float(words[1]) + setattr(self, words[0], value) def parse_string(self, words): - #info('self.%s = \'%s\''%tuple(words)) - exec('self.%s = \'%s\''%tuple(words)) - return + """Parse field to strings. + Args: + words: strings to parse + """ + setattr(self, words[0], words[1]) def set_up_data_structures(self): - for key_word in number_dictionaries+list_dictionaries+string_dictionaries: - exec('self.%s = {}'%key_word) - for key_word in string_lists: - exec('self.%s = []'%key_word) - for key_word in strings: - exec('self.%s = \'\''%key_word) - for key_word in matrices: - exec('self.%s = Interaction_matrix(\'%s\')'%(key_word,key_word)) - for key_word in pair_wise_matrices: - exec('self.%s =Pair_wise_matrix(\'%s\')'%(key_word,key_word)) + """Set up internal data structures. - return + TODO - it would be better to make these assignments explicit in __init__. + """ + for key_word in NUMBER_DICTIONARIES + LIST_DICTIONARIES \ + + STRING_DICTIONARIES: + setattr(self, key_word, {}) + for key_word in STRING_LISTS: + setattr(self, key_word, []) + for key_word in STRINGS: + setattr(self, key_word, "") + for key_word in MATRICES: + matrix = InteractionMatrix(key_word) + setattr(self, key_word, matrix) + for key_word in PAIR_WISE_MATRICES: + matrix = PairwiseMatrix(key_word) + setattr(self, key_word, matrix) def print_interaction_parameters(self): + """Print interaction parameters.""" info('--------------- Model pKa values ----------------------') - for k in self.model_pkas.keys(): + for k in self.model_pkas: info('%3s %8.2f' % (k, self.model_pkas[k])) info('') info('--------------- Interactions --------------------------') - agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - - map = { - 'CG' :['ARG'], - 'C2N':['ARG'], - 'N30':['N+','LYS'], - 'N31':['N+','LYS'], - 'N32':['N+','LYS'], - 'N33':['N+','LYS'] , - 'NAR':['HIS'], - 'OCO':['COO'], - 'OP' :[],#['TYR','SER'], - 'SH' :['CYS'] , - 'NP1':[], - 'OH' :['ROH'], - 'O3' :[] , - 'CL' :[], - 'F' :[], - 'NAM':['AMD'], - 'N1' :[], - 'O2' :[]} - - - for g1 in agroups: - for g2 in lgroups: - - interaction = '%3s %3s %1s %4s %4s'%(g1,g2, - self.interaction_matrix[g1][g2], - self.sidechain_cutoffs.get_value(g1,g2)[0], - self.sidechain_cutoffs.get_value(g1,g2)[1]) - + agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', + 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', + 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', + 'N1', 'O2', 'OP', 'SH'] + lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', + 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] + map_ = {'CG': ['ARG'], 'C2N': ['ARG'], 'N30': ['N+', 'LYS'], + 'N31': ['N+', 'LYS'], 'N32': ['N+', 'LYS'], 'N33': ['N+', 'LYS'], + 'NAR': ['HIS'], 'OCO': ['COO'], 'OP': [], 'SH': ['CYS'], + 'NP1': [], 'OH': ['ROH'], 'O3': [], 'CL': [], 'F': [], + 'NAM': ['AMD'], 'N1': [], 'O2': []} + for group1 in agroups: + for group2 in lgroups: + interaction = '%3s %3s %1s %4s %4s' \ + % (group1, group2, self.interaction_matrix[group1][group2], \ + self.sidechain_cutoffs.get_value(group1, group2)[0], \ + self.sidechain_cutoffs.get_value(group1, group2)[1]) map_interaction = '' - if g2 in map: - for m in map[g2]: - map_interaction += '|%3s %3s %1s %4s %4s'%(g1,m, - self.interaction_matrix[g1][m], - self.sidechain_cutoffs.get_value(g1,m)[0], - self.sidechain_cutoffs.get_value(g1,m)[1]) - if self.interaction_matrix[g1][m] != self.interaction_matrix[g1][g2]: + if group2 in map_: + for val in map_[group2]: + map_interaction += '|%3s %3s %1s %4s %4s' \ + % (group1, val, \ + self.interaction_matrix[group1][val], \ + self.sidechain_cutoffs.get_value(group1, val)[0], \ + self.sidechain_cutoffs.get_value(group1, val)[1]) + if self.interaction_matrix[group1][val] \ + != self.interaction_matrix[group1][group2]: map_interaction += '* ' - if self.sidechain_cutoffs.get_value(g1,m)[0] != self.sidechain_cutoffs.get_value(g1,g2)[0] or \ - self.sidechain_cutoffs.get_value(g1,m)[1] != self.sidechain_cutoffs.get_value(g1,g2)[1]: + if self.sidechain_cutoffs.get_value(group1, val)[0] \ + != self.sidechain_cutoffs.get_value(group1, group2)[0] \ + or self.sidechain_cutoffs.get_value(group1, val)[1] \ + != self.sidechain_cutoffs.get_value(group1, group2)[1]: map_interaction += '! ' else: map_interaction += ' ' - if len(map[g2])==0 and (self.sidechain_cutoffs.get_value(g1,g2)[0] !=3 or self.sidechain_cutoffs.get_value(g1,g2)[1] != 4): + if len(map_[group2]) == 0 \ + and (self.sidechain_cutoffs.get_value(group1, group2)[0] \ + != 3 or self.sidechain_cutoffs.get_value(group1, group2)[1] != 4): map_interaction += '? ' - info(interaction, map_interaction) - - if g1==g2: + if group1 == group2: break info('-') - info('--------------- Exceptions ----------------------------') info('COO-HIS', self.COO_HIS_exception) info('OCO-HIS', self.OCO_HIS_exception) info('CYS-HIS', self.CYS_HIS_exception) info('CYS-CYS', self.CYS_CYS_exception) - info('--------------- Mapping -------------------------------') info(""" Titratable: @@ -251,45 +281,19 @@ NAM N1 O2 """) - return - - - - - def print_interaction_parameters_latex(self): -# info('--------------- Model pKa values ----------------------') -# for k in self.model_pkas.keys(): -# info('%3s %8.2f'%(k,self.model_pkas[k])) - -# info('') -# info('--------------- Interactions --------------------------') - agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - - map = { - 'CG' :['ARG'], - 'C2N':['ARG'], - 'N30':['N+','LYS'], - 'N31':['N+','LYS'], - 'N32':['N+','LYS'], - 'N33':['N+','LYS'] , - 'NAR':['HIS'], - 'OCO':['COO'], - 'OP' :[],#['TYR','SER'], - 'SH' :['CYS'] , - 'NP1':['AMD'], - 'OH' :['ROH'], - 'O3' :[] , - 'CL' :[], - 'F' :[], - 'NAM':[], - 'N1' :[], - 'O2' :[]} - - - s = """ + """Print interaction parameters in LaTeX format.""" + # TODO - if these lists and dictionaries are the same as above, then + # should be constants at the level of the module + agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', + 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', + 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', + 'N1', 'O2', 'OP', 'SH'] + lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', + 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', + 'SH'] + str_ = """ \\begin{longtable}{lllll} \\caption{Ligand interaction parameters. For interactions not listed, the default value of %s is applied.} \\label{tab:ligand_interaction_parameters}\\\\ @@ -312,33 +316,32 @@ Group1 & Group2 & Interaction & c1 &c2 \\\\ \\bottomrule \\endlastfoot -"""%(self.sidechain_cutoffs.default) - for g1 in agroups: - for g2 in lgroups: - if self.interaction_matrix[g1][g2]=='-': +""" % (self.sidechain_cutoffs.default) + for group1 in agroups: + for group2 in lgroups: + if self.interaction_matrix[group1][group2] == '-': continue - if self.sidechain_cutoffs.get_value(g1,g2)==self.sidechain_cutoffs.default: + if self.sidechain_cutoffs.get_value(group1, group2) \ + == self.sidechain_cutoffs.default: continue - - - s+= '%3s & %3s & %1s & %4s & %4s\\\\ \n'%(g1,g2, - self.interaction_matrix[g1][g2], - self.sidechain_cutoffs.get_value(g1,g2)[0], - self.sidechain_cutoffs.get_value(g1,g2)[1]) - - if g1==g2: + str_ += '%3s & %3s & %1s & %4s & %4s\\\\ \n'\ + % (group1, group2, \ + self.interaction_matrix[group1][group2], \ + self.sidechain_cutoffs.get_value(group1, group2)[0], \ + self.sidechain_cutoffs.get_value(group1, group2)[1]) + if group1 == group2: break - - s += ' \\end{longtable}\n' - info(s) - return + str_ += ' \\end{longtable}\n' + info(str_) def print_interactions_latex(self): - agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - - - s = """ + """Print interactions in LaTeX.""" + # TODO - are these the same lists as above? Convert to module constants. + agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', + 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', + 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', + 'N1', 'O2', 'OP', 'SH'] + str_ = """ \\begin{longtable}{%s} \\caption{Ligand interaction parameters. For interactions not listed, the default value of %s is applied.} \\label{tab:ligand_interaction_parameters}\\\\ @@ -361,174 +364,186 @@ Group1 & Group2 & Interaction & c1 &c2 \\\\ \\bottomrule \\endlastfoot -"""%('l'*len(agroups),self.sidechain_cutoffs.default) - for g1 in agroups: - for g2 in agroups: - - s+= '%3s & %3s & %1s & %4s & %4s\\\\ \n'%(g1,g2, - self.interaction_matrix[g1][g2], - self.sidechain_cutoffs.get_value(g1,g2)[0], - self.sidechain_cutoffs.get_value(g1,g2)[1]) - - if g1==g2: +""" % ('l'*len(agroups), self.sidechain_cutoffs.default) + for group1 in agroups: + for group2 in agroups: + str_ += '%3s & %3s & %1s & %4s & %4s\\\\ \n' \ + % (group1, group2, \ + self.interaction_matrix[group1][group2], \ + self.sidechain_cutoffs.get_value(group1, group2)[0], \ + self.sidechain_cutoffs.get_value(group1, group2)[1]) + if group1 == group2: break - - s += ' \\end{longtable}\n' - info(s) - return + str_ += ' \\end{longtable}\n' + info(str_) +class InteractionMatrix: + """Interaction matrix class.""" - -class Interaction_matrix: def __init__(self, name): + """Initialize with name of matrix. + + Args: + name: name of interaction matrix + """ self.name = name + self.value = None self.ordered_keys = [] self.dictionary = {} - return - def add(self,words): + def add(self, words): + """Add values to matrix. + + Args: + words: values to add + """ new_group = words[0] self.ordered_keys.append(new_group) - if not new_group in self.dictionary.keys(): self.dictionary[new_group] = {} - - for i in range(len(self.ordered_keys)): - group = self.ordered_keys[i] - if len(words)>i+1: + for i, group in enumerate(self.ordered_keys): + if len(words) > i+1: try: - exec('self.value = %s'%words[i+1]) - except: + self.value = float(words[i+1]) + except ValueError: self.value = words[i+1] self.dictionary[group][new_group] = self.value self.dictionary[new_group][group] = self.value - - return - def get_value(self, item1, item2): + """Get specific matrix value. + + Args: + item1: matrix row index + item2: matrix column index + Returns: + matrix value or None + """ try: return self.dictionary[item1][item2] - except: + except KeyError: return None def __getitem__(self, group): + """Get specific group from matrix. + + Args: + group: group to get + """ if group not in self.dictionary.keys(): - raise Exception('%s not found in interaction matrix %s'%(group,self.name)) + str_ = '%s not found in interaction matrix %s' % (group, self.name) + raise KeyError(str_) return self.dictionary[group] - def keys(self): + """Get keys from matrix. + + Returns: + dictionary key list + """ return self.dictionary.keys() def __str__(self): - s = ' ' - for k1 in self.ordered_keys: - s+='%3s '%k1 - s+='\n' - for k1 in self.ordered_keys: - s+='%3s '%k1 - for k2 in self.ordered_keys: - s+='%3s '%self[k1][k2] - s+='\n' - - return s -# ks = ['COO', 'SER', 'ARG', 'LYS', 'HIS', 'AMD', 'CYS', 'TRP','ROH','TYR','N+','CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - -# p = '' -# n=0 -# for i in range(len(ks)): -# for j in range(i,len(ks)): -# if not [0.0,0.0]==self[ks[i]][ks[j]]: -# if not [3.0,4.0]==self[ks[i]][ks[j]]: -# p+='sidechain_cutoff %3s %3s %s\n'%(ks[i],ks[j],self[ks[i]][ks[j]]) -# n+=1 - -# info('total',n,len(ks)) -# return p + str_ = ' ' + for key in self.ordered_keys: + str_ += '%3s ' % key + str_ += '\n' + for key1 in self.ordered_keys: + str_ += '%3s ' % key1 + for key2 in self.ordered_keys: + str_ += '%3s ' % self[key1][key2] + str_ += '\n' + return str_ +class PairwiseMatrix: + """Pairwise interaction matrix class.""" -class Pair_wise_matrix: def __init__(self, name): + """Initialize pairwise matrix. + + Args: + name: name of pairwise interaction + """ self.name = name self.dictionary = {} self.default = [0.0, 0.0] - return - def add(self,words): + def add(self, words): + """Add information to the matrix. + + TODO - this function unnecessarily bundles arguments into a tuple + + Args: + words: tuple with assignment information and value + """ # assign the default value - if len(words)==3 and words[0]=='default': + if len(words) == 3 and words[0] == 'default': self.default = [float(words[1]), float(words[2])] return - # assign non-default values - g1 = words[0] - g2 = words[1] - v = [float(words[2]), float(words[3])] + group1 = words[0] + group2 = words[1] + value = [float(words[2]), float(words[3])] + self.insert(group1, group2, value) + self.insert(group2, group1, value) - self.insert(g1,g2,v) - self.insert(g2,g1,v) + def insert(self, key1, key2, value): + """Insert value into matrix. - return - - def insert(self, k1,k2,v): - - if k1 in self.dictionary.keys() and k2 in self.dictionary[k1].keys(): - if k1!=k2: - warning('Parameter value for %s, %s defined more than once' % (k1, k2)) - - if not k1 in self.dictionary: - self.dictionary[k1] = {} - - self.dictionary[k1][k2] =v - - return + Args: + key1: first matrix key (row) + key2: second matrix key (column) + value: value to insert + """ + if key1 in self.dictionary and key2 in self.dictionary[key1]: + if key1 != key2: + str_ = 'Parameter value for %s, %s defined more than once' \ + % (key1, key2) + warning(str_) + if not key1 in self.dictionary: + self.dictionary[key1] = {} + self.dictionary[key1][key2] = value def get_value(self, item1, item2): + """Get specified value from matrix. + Args: + item1: row index + item2: column index + Returns: + matrix value (or default) + """ try: return self.dictionary[item1][item2] - except: + except KeyError: return self.default def __getitem__(self, group): + """Get item from matrix corresponding to specific group. + + Args: + group: group to retrieve + Returns: + matrix information + """ if group not in self.dictionary.keys(): - raise Exception('%s not found in interaction matrix %s'%(group,self.name)) + str_ = '%s not found in interaction matrix %s' % (group, self.name) + raise KeyError(str_) return self.dictionary[group] - def keys(self): + """Get keys from matrix. + + Returns: + dictionary key list + """ return self.dictionary.keys() def __str__(self): - s='' - for k1 in self.keys(): - for k2 in self[k1].keys(): - s += '%s %s %s\n'%(k1,k2,self[k1][k2]) - - return s - - - - - - - - - - - - - - - - - - - - - - - + str_ = '' + for key1 in self.keys(): + for key2 in self[key1].keys(): + str_ += '%s %s %s\n' % (key1, key2, self[key1][key2]) + return str_ From 47226bb4b01cfc1d394190859d62eb862d28433a Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 11:29:11 -0700 Subject: [PATCH 26/65] Fix deprecated string syntax. --- propka/output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/propka/output.py b/propka/output.py index 8419cb6..85f807e 100644 --- a/propka/output.py +++ b/propka/output.py @@ -239,7 +239,7 @@ def get_folding_profile_section(protein, conformation='AVR', str_ += "Could not determine pH values where the free energy" str_ += " is within 80 %s of minimum\n" % ("%") else: - str_ += "The free energy is within 80 \% of maximum" + str_ += "The free energy is within 80 %% of maximum" str_ += " at pH %4.1lf to %4.1lf\n" % (dg_min, dg_max) if ph_min is None or ph_max is None: str_ += "Could not determine the pH-range where the free" From b3e2685e94ee056168ba5e6e8d7402e58fb21b45 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 11:54:41 -0700 Subject: [PATCH 27/65] De-lint propka.py. --- propka/pdb.py | 327 ++++++++++++++++++++++++++------------------------ 1 file changed, 171 insertions(+), 156 deletions(-) diff --git a/propka/pdb.py b/propka/pdb.py index a362189..6d76839 100644 --- a/propka/pdb.py +++ b/propka/pdb.py @@ -1,132 +1,136 @@ - -from __future__ import division -from __future__ import print_function - -import string, sys, copy - +"""PDB parsing functionality.""" import propka.lib -from propka.lib import info, warning - +from propka.lib import warning from propka.atom import Atom from propka.conformation_container import ConformationContainer -expected_atom_numbers = {'ALA':5, - 'ARG':11, - 'ASN':8, - 'ASP':8, - 'CYS':6, - 'GLY':4, - 'GLN':9, - 'GLU':9, - 'HIS':10, - 'ILE':8, - 'LEU':8, - 'LYS':9, - 'MET':8, - 'PHE':11, - 'PRO':7, - 'SER':6, - 'THR':7, - 'TRP':14, - 'TYR':12, - 'VAL':7} + +EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6, + 'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8, + 'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7, + 'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7} def read_pdb(pdb_file, parameters, molecule): - conformations = {} + """Parse a PDB file. + Args: + pdb_file: file to read + parameters: parameters to guide parsing + molecule: molecular container + Returns: + list with elements: + 1. list of conformations + 2. list of names + """ + conformations = {} # read in all atoms in the file - lines = get_atom_lines_from_pdb(pdb_file, ignore_residues = parameters.ignore_residues, keep_protons = molecule.options.keep_protons, chains=molecule.options.chains) + lines = get_atom_lines_from_pdb(pdb_file, + ignore_residues=parameters.ignore_residues, + keep_protons=molecule.options.keep_protons, + chains=molecule.options.chains) for (name, atom) in lines: if not name in conformations.keys(): - conformations[name] = ConformationContainer(name=name, parameters=parameters, molecular_container=molecule) + conformations[name] = ConformationContainer(name=name, + parameters=parameters, + molecular_container=molecule) conformations[name].add_atom(atom) - # make a sorted list of conformation names names = sorted(conformations.keys(), key=propka.lib.conformation_sorter) - return [conformations, names] -def protein_precheck(conformations, names): +def protein_precheck(conformations, names): + """Check protein for correct number of atoms, etc. + + Args: + names: conformation names to check + """ for name in names: atoms = conformations[name].atoms - # Group the atoms by their residue: atoms_by_residue = {} - for a in atoms: - if a.element != 'H': - res_id = resid_from_atom(a) + for atom in atoms: + if atom.element != 'H': + res_id = resid_from_atom(atom) try: - atoms_by_residue[res_id].append(a) + atoms_by_residue[res_id].append(atom) except KeyError: - atoms_by_residue[res_id] = [a] - + atoms_by_residue[res_id] = [atom] for res_id, res_atoms in atoms_by_residue.items(): res_name = res_atoms[0].res_name residue_label = '%3s%5s'%(res_name, res_id) - # ignore ligand residues - if res_name not in expected_atom_numbers: + if res_name not in EXPECTED_ATOM_NUMBERS: continue - # check for c-terminal if 'C-' in [a.terminal for a in res_atoms]: - if len(res_atoms) != expected_atom_numbers[res_name]+1: - warning('Unexpected number (%d) of atoms in residue %s in conformation %s' % (len(res_atoms), residue_label, name)) + if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1: + str_ = ("Unexpected number (%d) of atoms in residue %s " + "in conformation %s" % (len(res_atoms), + residue_label, name)) + warning(str_) continue - # check number of atoms in residue - if len(res_atoms) != expected_atom_numbers[res_name]: - warning('Unexpected number (%d) of atoms in residue %s in conformation %s' % (len(res_atoms), residue_label, name)) - - return - -def resid_from_atom(a): - return '%4d %s %s'%(a.res_num,a.chain_id,a.icode) + if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]: + str_ = ('Unexpected number (%d) of atoms in residue %s ' + 'in conformation %s' % (len(res_atoms), + residue_label, name)) + warning(str_) -def get_atom_lines_from_pdb(pdb_file, ignore_residues = [], keep_protons=False, tags = ['ATOM ', 'HETATM'], chains=None): +def resid_from_atom(atom): + """Return string with atom residue information. + Args: + atom: atom to generate string for + Returns + string + """ + return '%4d %s %s' % (atom.res_num, atom.chain_id, atom.icode) + + +def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False, + tags=['ATOM ', 'HETATM'], chains=None): + """Get atom lines from PDB file. + + Args: + pdb_file: PDB file to parse + ignore_residues: list of residues to ignore + keep_protons: bool to keep/ignore protons + tags: tags of lines that include atoms + chains: list of chains + """ lines = propka.lib.open_file_for_reading(pdb_file).readlines() nterm_residue = 'next_residue' old_residue = None terminal = None model = 1 - - for line in lines: tag = line[0:6] - # set the model number if tag == 'MODEL ': model = int(line[6:]) nterm_residue = 'next_residue' - if tag == 'TER ': nterm_residue = 'next_residue' - if tag in tags: alt_conf_tag = line[16] - residue_name = line[12:16] - residue_number = line[22:26] - + residue_name = line[12: 16] + residue_number = line[22: 26] # check if we want this residue - if line[17:20] in ignore_residues: + if line[17: 20] in ignore_residues: continue if chains and line[21] not in chains: continue - # set the Nterm residue number - nessecary because we may need to # identify more than one N+ group for structures with alt_conf tags if nterm_residue == 'next_residue' and tag == 'ATOM ': - # make sure that we reached a new residue - nessecary if OXT is not the last atom in - # the previous residue + # make sure that we reached a new residue - nessecary if OXT is + # not the last atom inthe previous residue if old_residue != residue_number: nterm_residue = residue_number old_residue = None - - # Identify the configuration # convert digits to letters if alt_conf_tag in '123456789': @@ -134,106 +138,116 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues = [], keep_protons=False, if alt_conf_tag == ' ': alt_conf_tag = 'A' conformation = '%d%s'%(model, alt_conf_tag) - # set the terminal if tag == 'ATOM ': - if residue_name.strip() == 'N' and nterm_residue == residue_number: + if (residue_name.strip() == 'N' + and nterm_residue == residue_number): terminal = 'N+' - if residue_name.strip() in ['OXT','O\'\'']: + if residue_name.strip() in ['OXT', 'O\'\'']: terminal = 'C-' nterm_residue = 'next_residue' old_residue = residue_number # and yield the atom atom = Atom(line=line) atom.terminal = terminal - #if keep_protons: - # atom.is_protonated = True if not (atom.element == 'H' and not keep_protons): #ignore hydrogen yield (conformation, atom) - terminal = None - return - def write_pdb(conformation, filename): + """Write PDB conformation to a file. + + Args: + conformation: conformation container + filename: filename for output + """ write_pdb_for_atoms(conformation.atoms, filename) - return + def write_pdb_for_atoms(atoms, filename, make_conect_section=False): - out = propka.lib.open_file_for_writing(filename) + """Write out PDB file for atoms. + Args: + atoms: list of atoms + filename: name of file + make_conect_section: generate a CONECT PDB section + """ + out = propka.lib.open_file_for_writing(filename) for atom in atoms: out.write(atom.make_pdb_line()) - if make_conect_section: for atom in atoms: out.write(atom.make_conect_line()) - - out.close() - return - - def write_mol2_for_atoms(atoms, filename): + """Write out MOL2 file for atoms. + Args: + atoms: list of atoms + filename: name of file + """ + # TODO - header needs to be converted to format string header = '@MOLECULE\n\n%d %d\nSMALL\nUSER_CHARGES\n' - atoms_section = '@ATOM\n' - for i in range(len(atoms)): - atoms_section += atoms[i].make_mol2_line(i+1) - - + for i, atom in enumerate(atoms): + atoms_section += atom.make_mol2_line(i+1) bonds_section = '@BOND\n' - id = 1 - for i in range(len(atoms)): - for j in range(i+1,len(atoms)): - if atoms[i] in atoms[j].bonded_atoms: - type = get_bond_order(atoms[i],atoms[j]) - bonds_section += '%7d %7d %7d %7s\n'%(id, i+1, j+1, type) - id+=1 - + id_ = 1 + for i, atom1 in enumerate(atoms): + for j, atom2 in enumerate(atoms, i+1): + if atom1 in atom2.bonded_atoms: + type_ = get_bond_order(atom1, atom2) + bonds_section += '%7d %7d %7d %7s\n' % (id_, i+1, j+1, type_) + id_ += 1 substructure_section = '@SUBSTRUCTURE\n\n' - if len(atoms)>0: - substructure_section = '@SUBSTRUCTURE\n%-7d %10s %7d\n'%(atoms[0].res_num,atoms[0].res_name,atoms[0].numb) - + if len(atoms) > 0: + substructure_section = ('@SUBSTRUCTURE\n%-7d %10s %7d\n' + % (atoms[0].res_num, atoms[0].res_name, + atoms[0].numb)) out = propka.lib.open_file_for_writing(filename) - out.write(header%(len(atoms),id-1)) + out.write(header % (len(atoms), id_-1)) out.write(atoms_section) out.write(bonds_section) out.write(substructure_section) out.close() - return def get_bond_order(atom1, atom2): - type = '1' + """Get the order of a bond between two atoms. + + Args: + atom1: first atom in bond + atom2: second atom in bond + Returns: + string with bond type + """ + type_ = '1' pi_electrons1 = atom1.num_pi_elec_2_3_bonds pi_electrons2 = atom2.num_pi_elec_2_3_bonds - if '.ar' in atom1.sybyl_type: - pi_electrons1 -=1 + pi_electrons1 -= 1 if '.ar' in atom2.sybyl_type: - pi_electrons2 -=1 - + pi_electrons2 -= 1 if pi_electrons1 > 0 and pi_electrons2 > 0: - type = '%d'%(min(pi_electrons1, pi_electrons2)+1) - + type_ = '%d' % (min(pi_electrons1, pi_electrons2)+1) if '.ar' in atom1.sybyl_type and '.ar' in atom2.sybyl_type: - type = 'ar' - - - return type - + type_ = 'ar' + return type_ def write_input(molecular_container, filename): - out = propka.lib.open_file_for_writing(filename) + """Write PROPKA input file for molecular container. + Args: + molecular_container: molecular container + filename: output file name + """ + out = propka.lib.open_file_for_writing(filename) for conformation_name in molecular_container.conformation_names: - out.write('MODEL %s\n'%conformation_name) + out.write('MODEL %s\n' % conformation_name) # write atoms for atom in molecular_container.conformations[conformation_name].atoms: out.write(atom.make_input_line()) @@ -247,43 +261,51 @@ def write_input(molecular_container, filename): for group in molecular_container.conformations[conformation_name].groups: out.write(group.make_non_covalently_coupled_line()) out.write('ENDMDL\n') - out.close() - return +def read_input(input_file, parameters, molecule): + """Read PROPKA input file for molecular container. -def read_input(input_file, parameters,molecule): + Args: + input_file: input file + parameters: parameters for parsing/setup + molecule: molecular container + Returns: + list with [conformations, names of conformations] + """ conformations = {} - # read in all atoms in the input file lines = get_atom_lines_from_input(input_file) for (name, atom) in lines: if not name in conformations.keys(): - conformations[name] = ConformationContainer(name=name, parameters=parameters, molecular_container=molecule) + conformations[name] = ConformationContainer( + name=name, parameters=parameters, + molecular_container=molecule) conformations[name].add_atom(atom) - # make a sorted list of conformation names names = sorted(conformations.keys(), key=propka.lib.conformation_sorter) - return [conformations, names] +def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']): + """Get atom lines from a PROPKA input file. -def get_atom_lines_from_input(input_file, tags = ['ATOM ','HETATM']): + Args: + input_file: input file + tags: tags defining atom lines + Yields: + conformation container, list of atoms + """ lines = propka.lib.open_file_for_reading(input_file).readlines() conformation = '' - atoms = {} numbers = [] - for line in lines: tag = line[0:6] - # set the conformation if tag == 'MODEL ': conformation = line[6:].strip() - # found an atom - save it if tag in tags: atom = Atom(line=line) @@ -292,46 +314,39 @@ def get_atom_lines_from_input(input_file, tags = ['ATOM ','HETATM']): atom.is_protonated = True atoms[atom.numb] = atom numbers.append(atom.numb) - # found bonding information - apply it - if tag == 'CONECT' and len(line)>14: + if tag == 'CONECT' and len(line) > 14: conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] center_atom = atoms[int(conect_numbers[0])] - for n in conect_numbers[1:]: - b = atoms[int(n)] + for num in conect_numbers[1:]: + bond_atom = atoms[int(num)] # remember to check for cysteine bridges - if center_atom.element == 'S' and b.element == 'S': - center_atom.cysteine_bridge = True - b.cysteine_bridge = True + if center_atom.element == 'S' and bond_atom.element == 'S': + center_atom.cysteine_bridge = True + bond_atom.cysteine_bridge = True # set up bonding - if not b in center_atom.bonded_atoms: - center_atom.bonded_atoms.append(b) - if not center_atom in b.bonded_atoms: - b.bonded_atoms.append(center_atom) - + if not bond_atom in center_atom.bonded_atoms: + center_atom.bonded_atoms.append(bond_atom) + if not center_atom in bond_atom.bonded_atoms: + bond_atom.bonded_atoms.append(center_atom) # found info on covalent coupling - if tag == 'CCOUPL' and len(line)>14: + if tag == 'CCOUPL' and len(line) > 14: conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] center_atom = atoms[int(conect_numbers[0])] - for n in conect_numbers[1:]: - cg = atoms[int(n)] - center_atom.group.couple_covalently(cg.group) - + for num in conect_numbers[1:]: + cov_atom = atoms[int(num)] + center_atom.group.couple_covalently(cov_atom.group) # found info on non-covalent coupling - if tag == 'NCOUPL' and len(line)>14: + if tag == 'NCOUPL' and len(line) > 14: conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] center_atom = atoms[int(conect_numbers[0])] - for n in conect_numbers[1:]: - cg = atoms[int(n)] - center_atom.group.couple_non_covalently(cg.group) - + for num in conect_numbers[1:]: + cov_atom = atoms[int(num)] + center_atom.group.couple_non_covalently(cov_atom.group) # this conformation is done - yield the atoms if tag == 'ENDMDL': - for n in numbers: - yield (conformation, atoms[n]) + for num in numbers: + yield (conformation, atoms[num]) # prepare for next conformation atoms = {} numbers = [] - - - return From cd0e9e5c3d27c8206cd6a24b99270bed45d84790 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 13:05:16 -0700 Subject: [PATCH 28/65] De-lint protonate.py. --- propka/protonate.py | 554 +++++++++++++++++++------------------------- 1 file changed, 236 insertions(+), 318 deletions(-) diff --git a/propka/protonate.py b/propka/protonate.py index f6ff7e6..d9c0c09 100644 --- a/propka/protonate.py +++ b/propka/protonate.py @@ -1,441 +1,359 @@ -#!/usr/bin/python +"""Protonate a structure.""" +import math +import propka.bonds +import propka.atom +from propka.vector_algebra import rotate_vector_around_an_axis, vector +from propka.lib import warning, debug -from __future__ import division -from __future__ import print_function - -from propka.vector_algebra import * -import propka.bonds, propka.pdb, propka.atom -from propka.lib import info, warning, debug class Protonate: """ Protonates atoms using VSEPR theory """ def __init__(self, verbose=False): - self.verbose=verbose - - self.valence_electrons = {'H': 1, - 'He':2, - 'Li':1, - 'Be':2, - 'B': 3, - 'C': 4, - 'N': 5, - 'O': 6, - 'F': 7, - 'Ne':8, - 'Na':1, - 'Mg':2, - 'Al':3, - 'Si':4, - 'P': 5, - 'S': 6, - 'Cl':7, - 'Ar':8, - 'K': 1, - 'Ca':2, - 'Sc':2, - 'Ti':2, - 'Va':2, - 'Cr':1, - 'Mn':2, - 'Fe':2, - 'Co':2, - 'Ni':2, - 'Cu':1, - 'Zn':2, - 'Ga':3, - 'Ge':4, - 'As':5, - 'Se':6, - 'Br':7, - 'Kr':8, - 'I':7, - } - - - - - - self.standard_charges= {'ARG-NH1':1.0, - 'ASP-OD2':-1.0, - 'GLU-OE2':-1.0, - 'HIS-ND1':1.0, - 'LYS-NZ':1.0, - 'N+':1.0, - 'C-':-1.0} - - - self.sybyl_charges = {'N.pl3':+1, - 'N.3':+1, - 'N.4':+1, - 'N.ar':+1, - 'O.co2-':-1} - - - self.bond_lengths = {'C':1.09, - 'N':1.01, - 'O':0.96, - 'F':0.92, - 'Cl':1.27, - 'Br':1.41, - 'I':1.61, - 'S':1.35} - - - # protonation_methods[steric_number] = method - self.protonation_methods = {4:self.tetrahedral, - 3:self.trigonal} - - - return - - + """Initialize with flag for verbosity + Args: + verbose: True for verbose output + """ + self.verbose = verbose + self.valence_electrons = {'H': 1, 'He': 2, 'Li': 1, 'Be': 2, 'B': 3, + 'C': 4, 'N': 5, 'O': 6, 'F': 7, 'Ne': 8, + 'Na': 1, 'Mg': 2, 'Al': 3, 'Si': 4, 'P': 5, + 'S': 6, 'Cl': 7, 'Ar': 8, 'K': 1, 'Ca': 2, + 'Sc': 2, 'Ti': 2, 'Va': 2, 'Cr': 1, 'Mn': 2, + 'Fe': 2, 'Co': 2, 'Ni': 2, 'Cu': 1, 'Zn': 2, + 'Ga': 3, 'Ge': 4, 'As': 5, 'Se': 6, 'Br': 7, + 'Kr': 8, 'I': 7} + # TODO - consider putting charges in a configuration file + self.standard_charges = {'ARG-NH1': 1.0, 'ASP-OD2': -1.0, + 'GLU-OE2': -1.0, 'HIS-ND1': 1.0, + 'LYS-NZ': 1.0, 'N+': 1.0, 'C-': -1.0} + self.sybyl_charges = {'N.pl3': 1, 'N.3': 1, 'N.4': 1, 'N.ar': 1, + 'O.co2-': 1} + # TODO - consider putting bond lengths in a configuration file + self.bond_lengths = {'C': 1.09, 'N': 1.01, 'O': 0.96, 'F': 0.92, + 'Cl': 1.27, 'Br': 1.41, 'I': 1.61, 'S': 1.35} + self.protonation_methods = {4: self.tetrahedral, 3: self.trigonal} def protonate(self, molecules): - """ Will protonate all atoms in the molecular container """ + """Protonate all atoms in the molecular container. + Args: + molecules: molecular containers + """ debug('----- Protonation started -----') # Remove all currently present hydrogen atoms self.remove_all_hydrogen_atoms(molecules) - # protonate all atoms for name in molecules.conformation_names: - non_H_atoms = molecules.conformations[name].get_non_hydrogen_atoms() - - for atom in non_H_atoms: + non_h_atoms = (molecules.conformations[name] + .get_non_hydrogen_atoms()) + for atom in non_h_atoms: self.protonate_atom(atom) - # fix hydrogen names - #self.set_proton_names(non_H_atoms) + @staticmethod + def remove_all_hydrogen_atoms(molecular_container): + """Remove all hydrogen atoms from molecule. - return - - - def remove_all_hydrogen_atoms(self, molecular_container): + Args: + molecular_container: molecule to remove hydrogens from + """ for name in molecular_container.conformation_names: - molecular_container.conformations[name].atoms = molecular_container.conformations[name].get_non_hydrogen_atoms() - return - + molecular_container.conformations[name].atoms = ( + molecular_container.conformations[name] + .get_non_hydrogen_atoms()) def set_charge(self, atom): + """Set charge for atom. + + Args: + atom: atom to be charged + """ # atom is a protein atom - if atom.type=='atom': - key = '%3s-%s'%(atom.res_name, atom.name) + if atom.type == 'atom': + key = '%3s-%s' % (atom.res_name, atom.name) if atom.terminal: debug(atom.terminal) - key=atom.terminal - if key in list(self.standard_charges.keys()): + key = atom.terminal + if key in self.standard_charges: atom.charge = self.standard_charges[key] debug('Charge', atom, atom.charge) atom.charge_set = True # atom is a ligand atom - elif atom.type=='hetatm': - if atom.sybyl_type in list(self.sybyl_charges.keys()): + elif atom.type == 'hetatm': + if atom.sybyl_type in self.sybyl_charges: atom.charge = self.sybyl_charges[atom.sybyl_type] - atom.sybyl_type = atom.sybyl_type.replace('-','') + atom.sybyl_type = atom.sybyl_type.replace('-', '') atom.charge_set = True - return - def protonate_atom(self, atom): - if atom.is_protonated: return - if atom.element == 'H': return + """Protonate an atom. + Args: + atom: atom to be protonated + """ + if atom.is_protonated: + return + if atom.element == 'H': + return self.set_charge(atom) self.set_number_of_protons_to_add(atom) self.set_steric_number_and_lone_pairs(atom) self.add_protons(atom) atom.is_protonated = True - return - def set_proton_names(self, heavy_atoms): + @staticmethod + def set_proton_names(heavy_atoms): + """Set names for protons. + Args: + heavy_atoms: list of heavy atoms with protons to be named + """ for heavy_atom in heavy_atoms: i = 1 for bonded in heavy_atom.bonded_atoms: - if bonded.element == 'H': - bonded.name+='%d'%i - i+=1 - - - return - + bonded.name += '%d' % i + i += 1 def set_number_of_protons_to_add(self, atom): - debug('*'*10) - debug('Setting number of protons to add for',atom) - atom.number_of_protons_to_add = 8 - debug(' %4d'%8) - atom.number_of_protons_to_add -= self.valence_electrons[atom.element] - debug('Valence eletrons: %4d'%-self.valence_electrons[atom.element]) - atom.number_of_protons_to_add -= len(atom.bonded_atoms) - debug('Number of bonds: %4d'%- len(atom.bonded_atoms)) - atom.number_of_protons_to_add -= atom.num_pi_elec_2_3_bonds - debug('Pi electrons: %4d'%-atom.num_pi_elec_2_3_bonds) - atom.number_of_protons_to_add += int(atom.charge) - debug('Charge: %4.1f'%atom.charge) + """Set the number of protons to add to this atom. + Args: + atom: atom for calculation + """ + debug('*'*10) + debug('Setting number of protons to add for', atom) + atom.number_of_protons_to_add = 8 + debug(' %4d' % 8) + atom.number_of_protons_to_add -= self.valence_electrons[atom.element] + debug('Valence eletrons: %4d' % -self.valence_electrons[atom.element]) + atom.number_of_protons_to_add -= len(atom.bonded_atoms) + debug('Number of bonds: %4d' % -len(atom.bonded_atoms)) + atom.number_of_protons_to_add -= atom.num_pi_elec_2_3_bonds + debug('Pi electrons: %4d' % -atom.num_pi_elec_2_3_bonds) + atom.number_of_protons_to_add += int(atom.charge) + debug('Charge: %4.1f' % atom.charge) debug('-'*10) debug(atom.number_of_protons_to_add) - return - def set_steric_number_and_lone_pairs(self, atom): + """Set steric number and lone pairs for atom. + Args: + atom: atom for calculation + """ # If we already did this, there is no reason to do it again if atom.steric_num_lone_pairs_set: return - debug('='*10) - debug('Setting steric number and lone pairs for',atom) - - # costumly set the N backbone atoms up for peptide bond trigonal planer shape - #if atom.name == 'N' and len(atom.bonded_atoms) == 2: - # atom.steric_number = 3 - # atom.number_of_lone_pairs = 0 - # self.display 'Peptide bond: steric number is %d and number of lone pairs is %s'%(atom.steric_number, - # atom.number_of_lone_pairs) - # return - - + debug('Setting steric number and lone pairs for', atom) atom.steric_number = 0 - - debug('%65s: %4d'%('Valence electrons',self.valence_electrons[atom.element])) + debug('%65s: %4d' % ('Valence electrons', + self.valence_electrons[atom.element])) atom.steric_number += self.valence_electrons[atom.element] - - debug('%65s: %4d'%('Number of bonds',len(atom.bonded_atoms))) + debug('%65s: %4d' % ('Number of bonds', + len(atom.bonded_atoms))) atom.steric_number += len(atom.bonded_atoms) - - debug('%65s: %4d'%('Number of hydrogen atoms to add',atom.number_of_protons_to_add)) + debug('%65s: %4d' % ('Number of hydrogen atoms to add', + atom.number_of_protons_to_add)) atom.steric_number += atom.number_of_protons_to_add - - debug('%65s: %4d'%('Number of pi-electrons in double and triple bonds(-)',atom.num_pi_elec_2_3_bonds)) + debug('%65s: %4d' % ('Number of pi-electrons in double ' + 'and triple bonds(-)', + atom.num_pi_elec_2_3_bonds)) atom.steric_number -= atom.num_pi_elec_2_3_bonds - - debug('%65s: %4d'%('Number of pi-electrons in conjugated double and triple bonds(-)',atom.num_pi_elec_conj_2_3_bonds)) + debug('%65s: %4d' % ('Number of pi-electrons in conjugated double and ' + 'triple bonds(-)', + atom.num_pi_elec_conj_2_3_bonds)) atom.steric_number -= atom.num_pi_elec_conj_2_3_bonds - - debug('%65s: %4d'%('Number of donated co-ordinated bonds',0)) + debug('%65s: %4d' % ('Number of donated co-ordinated bonds', 0)) atom.steric_number += 0 - - debug('%65s: %4.1f'%('Charge(-)',atom.charge)) + debug('%65s: %4.1f' % ('Charge(-)', atom.charge)) atom.steric_number -= atom.charge - atom.steric_number = math.floor(atom.steric_number/2.0) - - atom.number_of_lone_pairs = atom.steric_number - len(atom.bonded_atoms) - atom.number_of_protons_to_add - + atom.number_of_lone_pairs = (atom.steric_number + - len(atom.bonded_atoms) + - atom.number_of_protons_to_add) debug('-'*70) - debug('%65s: %4d'%('Steric number',atom.steric_number)) - debug('%65s: %4d'%('Number of lone pairs',atom.number_of_lone_pairs)) - + debug('%65s: %4d' % ('Steric number', atom.steric_number)) + debug('%65s: %4d' % ('Number of lone pairs', + atom.number_of_lone_pairs)) atom.steric_num_lone_pairs_set = True - return - - def add_protons(self, atom): + """Add protons to atom. + + Args: + atom: atom for calculation + """ # decide which method to use - debug('PROTONATING',atom) + debug('PROTONATING', atom) if atom.steric_number in list(self.protonation_methods.keys()): self.protonation_methods[atom.steric_number](atom) else: - warning('Do not have a method for protonating', atom, '(steric number: %d)' % atom.steric_number) - - return - + warning('Do not have a method for protonating', + atom, '(steric number: %d)' % atom.steric_number) def trigonal(self, atom): - debug('TRIGONAL - %d bonded atoms'%(len(atom.bonded_atoms))) + """Add hydrogens in trigonal geometry. + + Args: + atom: atom to protonate + """ + debug('TRIGONAL - %d bonded atoms' % len(atom.bonded_atoms)) rot_angle = math.radians(120.0) - - c = vector(atom1 = atom) - + cvec = vector(atom1=atom) # 0 bonds if len(atom.bonded_atoms) == 0: pass - # 1 bond if len(atom.bonded_atoms) == 1 and atom.number_of_protons_to_add > 0: # Add another atom with the right angle to the first one - a = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]) + avec = vector(atom1=atom, atom2=atom.bonded_atoms[0]) # use plane of bonded trigonal atom - e.g. arg - self.set_steric_number_and_lone_pairs(atom.bonded_atoms[0]) - if atom.bonded_atoms[0].steric_number == 3 and len(atom.bonded_atoms[0].bonded_atoms)>1: - # use other atoms bonded to the neighbour to establish the plane, if possible + if (atom.bonded_atoms[0].steric_number == 3 + and len(atom.bonded_atoms[0].bonded_atoms) > 1): + # use other atoms bonded to the neighbour to establish the + # plane, if possible other_atom_indices = [] - for i in range(len(atom.bonded_atoms[0].bonded_atoms)): - if atom.bonded_atoms[0].bonded_atoms[i] != atom: + for i, bonded_atom in enumerate(atom.bonded_atoms[0].bonded_atoms): + if bonded_atom != atom: other_atom_indices.append(i) - - - v1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]) - v2 = vector(atom1 = atom.bonded_atoms[0], - atom2 = atom.bonded_atoms[0].bonded_atoms[other_atom_indices[0]]) - - axis = v1**v2 - - # this is a trick to make sure that the order of atoms doesn't influence - # the final postions of added protons - if len(other_atom_indices)>1: - v3 = vector(atom1 = atom.bonded_atoms[0], - atom2 = atom.bonded_atoms[0].bonded_atoms[other_atom_indices[1]]) - - axis2 = v1**v3 - - if axis * axis2>0: + vec1 = vector(atom1=atom, atom2=atom.bonded_atoms[0]) + vec2 = vector(atom1=atom.bonded_atoms[0], + atom2=atom.bonded_atoms[0] + .bonded_atoms[other_atom_indices[0]]) + axis = vec1**vec2 + # this is a trick to make sure that the order of atoms doesn't + # influence the final postions of added protons + if len(other_atom_indices) > 1: + vec3 = vector(atom1=atom.bonded_atoms[0], + atom2=atom.bonded_atoms[0] + .bonded_atoms[other_atom_indices[1]]) + axis2 = vec1**vec3 + if axis*axis2 > 0: axis = axis+axis2 else: axis = axis-axis2 - else: - axis = a.orthogonal() - - a = rotate_vector_around_an_axis(rot_angle, axis, a) - a = self.set_bond_distance(a, atom.element) - self.add_proton(atom, c+a) - + axis = avec.orthogonal() + avec = rotate_vector_around_an_axis(rot_angle, axis, avec) + avec = self.set_bond_distance(avec, atom.element) + self.add_proton(atom, cvec+avec) # 2 bonds if len(atom.bonded_atoms) == 2 and atom.number_of_protons_to_add > 0: # Add another atom with the right angle to the first two - a1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]).rescale(1.0) - a2 = vector(atom1 = atom, atom2 = atom.bonded_atoms[1]).rescale(1.0) + avec1 = vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0) + avec2 = vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0) - new_a = -a1 - a2 + new_a = -avec1 - avec2 new_a = self.set_bond_distance(new_a, atom.element) - self.add_proton(atom, c+new_a) - - - return - + self.add_proton(atom, cvec+new_a) def tetrahedral(self, atom): - debug('TETRAHEDRAL - %d bonded atoms'%(len(atom.bonded_atoms))) + """Protonate atom in tetrahedral geometry. + + Args: + atom: atom to protonate. + """ + debug('TETRAHEDRAL - %d bonded atoms' % len(atom.bonded_atoms)) + # TODO - might be good to move tetrahedral angle to constant rot_angle = math.radians(109.5) - - # sanity check - # if atom.number_of_protons_to_add + len(atom.bonded_atoms) != 4: - # self.display 'Error: Attempting tetrahedral structure with %d bonds'%(atom.number_of_protons_to_add + - # len(atom.bonded_atoms)) - - c = vector(atom1 = atom) - + cvec = vector(atom1=atom) # 0 bonds if len(atom.bonded_atoms) == 0: pass - # 1 bond if len(atom.bonded_atoms) == 1 and atom.number_of_protons_to_add > 0: # Add another atom with the right angle to the first one - a = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]) - axis = a.orthogonal() - a = rotate_vector_around_an_axis(rot_angle, axis, a) - a = self.set_bond_distance(a, atom.element) - self.add_proton(atom, c+a) - + avec = vector(atom1=atom, atom2=atom.bonded_atoms[0]) + axis = avec.orthogonal() + avec = rotate_vector_around_an_axis(rot_angle, axis, avec) + avec = self.set_bond_distance(avec, atom.element) + self.add_proton(atom, cvec+avec) # 2 bonds if len(atom.bonded_atoms) == 2 and atom.number_of_protons_to_add > 0: # Add another atom with the right angle to the first two - a1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]).rescale(1.0) - a2 = vector(atom1 = atom, atom2 = atom.bonded_atoms[1]).rescale(1.0) - - axis = a1 + a2 - - new_a = rotate_vector_around_an_axis(math.radians(90), axis, -a1) + avec1 = vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0) + avec2 = vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0) + axis = avec1 + avec2 + new_a = rotate_vector_around_an_axis(math.radians(90), axis, + -avec1) new_a = self.set_bond_distance(new_a, atom.element) - self.add_proton(atom, c+new_a) - + self.add_proton(atom, cvec+new_a) # 3 bonds if len(atom.bonded_atoms) == 3 and atom.number_of_protons_to_add > 0: - a1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]).rescale(1.0) - a2 = vector(atom1 = atom, atom2 = atom.bonded_atoms[1]).rescale(1.0) - a3 = vector(atom1 = atom, atom2 = atom.bonded_atoms[2]).rescale(1.0) - - new_a = -a1-a2-a3 + avec1 = vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0) + avec2 = vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0) + avec3 = vector(atom1=atom, atom2=atom.bonded_atoms[2]).rescale(1.0) + new_a = -avec1-avec2-avec3 new_a = self.set_bond_distance(new_a, atom.element) - self.add_proton(atom, c+new_a) + self.add_proton(atom, cvec+new_a) - return + @staticmethod + def add_proton(atom, position): + """Add a proton to an atom at a specific position. - - def add_proton(self, atom, position): + Args: + atom: atom to protonate + position: position for proton + """ # Create the new proton - new_H = propka.atom.Atom() - new_H.set_property(numb = None, - name = 'H%s'%atom.name[1:], - res_name = atom.res_name, - chain_id = atom.chain_id, - res_num = atom.res_num, - x = round(position.x,3), # round of to three digimal points - y = round(position.y,3), # to avoid round-off differences - z = round(position.z,3), # when input file - occ = None, - beta = None) - new_H.element = 'H' - new_H.type = atom.type - - new_H.bonded_atoms = [atom] - new_H.charge = 0 - new_H.steric_number = 0 - new_H.number_of_lone_pairs = 0 - new_H.number_of_protons_to_add = 0 - new_H.num_pi_elec_2_3_bonds = 0 - new_H.is_protonates = True - - atom.bonded_atoms.append(new_H) - atom.number_of_protons_to_add -=1 - atom.conformation_container.add_atom(new_H) - + new_h = propka.atom.Atom() + new_h.set_property( + numb=None, + name='H%s' % atom.name[1:], + res_name=atom.res_name, + chain_id=atom.chain_id, + res_num=atom.res_num, + x=round(position.x, 3), # round of to three decimal points to + # avoid round-off differences in input file + y=round(position.y, 3), + z=round(position.z, 3), + occ=None, + beta=None) + new_h.element = 'H' + new_h.type = atom.type + new_h.bonded_atoms = [atom] + new_h.charge = 0 + new_h.steric_number = 0 + new_h.number_of_lone_pairs = 0 + new_h.number_of_protons_to_add = 0 + new_h.num_pi_elec_2_3_bonds = 0 + new_h.is_protonates = True + atom.bonded_atoms.append(new_h) + atom.number_of_protons_to_add -= 1 + atom.conformation_container.add_atom(new_h) # update names of all protons on this atom - new_H.residue_label = "%-3s%4d%2s" % (new_H.name,new_H.res_num, new_H.chain_id) + new_h.residue_label = "%-3s%4d%2s" % (new_h.name, new_h.res_num, + new_h.chain_id) no_protons = atom.count_bonded_elements('H') if no_protons > 1: i = 1 for proton in atom.get_bonded_elements('H'): - proton.name = 'H%s%d'%(atom.name[1:],i) - proton.residue_label = "%-3s%4d%2s" % (proton.name,proton.res_num, proton.chain_id) - i+=1 + proton.name = 'H%s%d' % (atom.name[1:], i) + proton.residue_label = "%-3s%4d%2s" % (proton.name, + proton.res_num, + proton.chain_id) + i += 1 + debug('added', new_h, 'to', atom) + def set_bond_distance(self, bvec, element): + """Set bond distance between atom and element. - debug('added',new_H, 'to',atom) - return - - def set_bond_distance(self, a, element): - d = 1.0 + Args: + bvec: bond vector + element: bonded element + Returns: + scaled bond vector + """ + dist = 1.0 if element in list(self.bond_lengths.keys()): - d = self.bond_lengths[element] + dist = self.bond_lengths[element] else: - warning('Bond length for %s not found, using the standard value of %f' % (element, d)) - - a = a.rescale(d) - - return a - - -if __name__ == '__main__': - import protein, pdb, sys,os - arguments = sys.argv - if len(arguments) != 2: - info('Usage: protonate.py ') - sys.exit(0) - - filename = arguments[1] - if not os.path.isfile(filename): - info('Error: Could not find \"%s\"' % filename) - sys.exit(1) - - - p = Protonate() - pdblist = pdb.readPDB(filename) - my_protein = protein.Protein(pdblist,'test.pdb') - - p.remove_all_hydrogen_atoms_from_protein(my_protein) - my_protein.write_pdb('before_protonation.pdb') - - p.protonate_protein(my_protein) - - ## write out protonated file - my_protein.write_pdb('protonated.pdb') + str_ = ('Bond length for %s not found, using the standard value ' + 'of %f' % (element, dist)) + warning(str_) + bvec = bvec.rescale(dist) + return bvec From 2321a4df8ab686a197ebbb099d6f28c57b35d283 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 13:10:55 -0700 Subject: [PATCH 29/65] De-lint run.py. --- propka/run.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/propka/run.py b/propka/run.py index d5b3cc2..b633326 100644 --- a/propka/run.py +++ b/propka/run.py @@ -1,24 +1,24 @@ -# entry point for propka script +"""Entry point for PROPKA script.""" import logging -import propka.lib, propka.molecular_container +from propka.lib import loadOptions +from propka.molecular_container import Molecular_container _LOGGER = logging.getLogger("PROPKA") -def main(): - """ - Reads in structure files, calculates pKa values, and prints pKa files - """ - # loading options, flaggs and arguments - options = propka.lib.loadOptions() +def main(optargs=None): + """Read in structure files, calculate pKa values, and print pKa files.""" + # loading options, flags and arguments + optargs = optargs if optargs is not None else [] + options = loadOptions(*optargs) pdbfiles = options.filenames - for pdbfile in pdbfiles: - my_molecule = propka.molecular_container.Molecular_container(pdbfile, options) + my_molecule = Molecular_container(pdbfile, options) my_molecule.calculate_pka() my_molecule.write_pka() + def single(pdbfile, optargs=None): """Run a single PROPKA calculation using *pdbfile* as input. @@ -30,12 +30,11 @@ def single(pdbfile, optargs=None): single("protein.pdb", optargs=["--mutation=N25R/N181D", "-v", "--pH=7.2"]) """ optargs = optargs if optargs is not None else [] - options = propka.lib.loadOptions(*optargs) + options = loadOptions(*optargs) pdbfile = options.filenames.pop(0) if len(options.filenames) > 0: _LOGGER.warning("Ignoring filenames: %s", options.filenames) - - my_molecule = propka.molecular_container.Molecular_container(pdbfile, options) + my_molecule = Molecular_container(pdbfile, options) my_molecule.calculate_pka() my_molecule.write_pka() return my_molecule From 6702f967300bb54e2183627f8341d5a5755c0cb5 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 13:53:04 -0700 Subject: [PATCH 30/65] De-lint vector_algebra.py --- propka/ligand.py | 8 +- propka/protonate.py | 30 +-- propka/vector_algebra.py | 442 ++++++++++++++++++++++++--------------- 3 files changed, 287 insertions(+), 193 deletions(-) diff --git a/propka/ligand.py b/propka/ligand.py index e97efc6..2be158d 100644 --- a/propka/ligand.py +++ b/propka/ligand.py @@ -1,6 +1,6 @@ """Ligand classes and functions.""" from propka.calculations import squared_distance -from propka.vector_algebra import vector +from propka.vector_algebra import Vector ALL_SYBYL_TYPES = [ @@ -303,12 +303,12 @@ def are_atoms_planar(atoms): return False if len(atoms) < 4: return False - vec1 = vector(atom1=atoms[0], atom2=atoms[1]) - vec2 = vector(atom1=atoms[0], atom2=atoms[2]) + vec1 = Vector(atom1=atoms[0], atom2=atoms[1]) + vec2 = Vector(atom1=atoms[0], atom2=atoms[2]) norm = (vec1**vec2).rescale(1.0) margin = PLANARITY_MARGIN for atom in atoms[3:]: - vec = vector(atom1=atoms[0], atom2=atom).rescale(1.0) + vec = Vector(atom1=atoms[0], atom2=atom).rescale(1.0) if abs(vec*norm) > margin: return False return True diff --git a/propka/protonate.py b/propka/protonate.py index d9c0c09..e3d96ff 100644 --- a/propka/protonate.py +++ b/propka/protonate.py @@ -2,7 +2,7 @@ import math import propka.bonds import propka.atom -from propka.vector_algebra import rotate_vector_around_an_axis, vector +from propka.vector_algebra import rotate_vector_around_an_axis, Vector from propka.lib import warning, debug @@ -202,14 +202,14 @@ class Protonate: """ debug('TRIGONAL - %d bonded atoms' % len(atom.bonded_atoms)) rot_angle = math.radians(120.0) - cvec = vector(atom1=atom) + cvec = Vector(atom1=atom) # 0 bonds if len(atom.bonded_atoms) == 0: pass # 1 bond if len(atom.bonded_atoms) == 1 and atom.number_of_protons_to_add > 0: # Add another atom with the right angle to the first one - avec = vector(atom1=atom, atom2=atom.bonded_atoms[0]) + avec = Vector(atom1=atom, atom2=atom.bonded_atoms[0]) # use plane of bonded trigonal atom - e.g. arg self.set_steric_number_and_lone_pairs(atom.bonded_atoms[0]) if (atom.bonded_atoms[0].steric_number == 3 @@ -220,15 +220,15 @@ class Protonate: for i, bonded_atom in enumerate(atom.bonded_atoms[0].bonded_atoms): if bonded_atom != atom: other_atom_indices.append(i) - vec1 = vector(atom1=atom, atom2=atom.bonded_atoms[0]) - vec2 = vector(atom1=atom.bonded_atoms[0], + vec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0]) + vec2 = Vector(atom1=atom.bonded_atoms[0], atom2=atom.bonded_atoms[0] .bonded_atoms[other_atom_indices[0]]) axis = vec1**vec2 # this is a trick to make sure that the order of atoms doesn't # influence the final postions of added protons if len(other_atom_indices) > 1: - vec3 = vector(atom1=atom.bonded_atoms[0], + vec3 = Vector(atom1=atom.bonded_atoms[0], atom2=atom.bonded_atoms[0] .bonded_atoms[other_atom_indices[1]]) axis2 = vec1**vec3 @@ -244,8 +244,8 @@ class Protonate: # 2 bonds if len(atom.bonded_atoms) == 2 and atom.number_of_protons_to_add > 0: # Add another atom with the right angle to the first two - avec1 = vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0) - avec2 = vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0) + avec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0) + avec2 = Vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0) new_a = -avec1 - avec2 new_a = self.set_bond_distance(new_a, atom.element) @@ -260,14 +260,14 @@ class Protonate: debug('TETRAHEDRAL - %d bonded atoms' % len(atom.bonded_atoms)) # TODO - might be good to move tetrahedral angle to constant rot_angle = math.radians(109.5) - cvec = vector(atom1=atom) + cvec = Vector(atom1=atom) # 0 bonds if len(atom.bonded_atoms) == 0: pass # 1 bond if len(atom.bonded_atoms) == 1 and atom.number_of_protons_to_add > 0: # Add another atom with the right angle to the first one - avec = vector(atom1=atom, atom2=atom.bonded_atoms[0]) + avec = Vector(atom1=atom, atom2=atom.bonded_atoms[0]) axis = avec.orthogonal() avec = rotate_vector_around_an_axis(rot_angle, axis, avec) avec = self.set_bond_distance(avec, atom.element) @@ -275,8 +275,8 @@ class Protonate: # 2 bonds if len(atom.bonded_atoms) == 2 and atom.number_of_protons_to_add > 0: # Add another atom with the right angle to the first two - avec1 = vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0) - avec2 = vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0) + avec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0) + avec2 = Vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0) axis = avec1 + avec2 new_a = rotate_vector_around_an_axis(math.radians(90), axis, -avec1) @@ -284,9 +284,9 @@ class Protonate: self.add_proton(atom, cvec+new_a) # 3 bonds if len(atom.bonded_atoms) == 3 and atom.number_of_protons_to_add > 0: - avec1 = vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0) - avec2 = vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0) - avec3 = vector(atom1=atom, atom2=atom.bonded_atoms[2]).rescale(1.0) + avec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0) + avec2 = Vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0) + avec3 = Vector(atom1=atom, atom2=atom.bonded_atoms[2]).rescale(1.0) new_a = -avec1-avec2-avec3 new_a = self.set_bond_distance(new_a, atom.element) self.add_proton(atom, cvec+new_a) diff --git a/propka/vector_algebra.py b/propka/vector_algebra.py index 202d9c5..8d874b5 100644 --- a/propka/vector_algebra.py +++ b/propka/vector_algebra.py @@ -1,11 +1,21 @@ -from __future__ import division -from __future__ import print_function +"""Vector algebra for PROPKA.""" import math -from propka.lib import info +from propka.lib import info, get_sorted_configurations -class vector: - """ Vector """ - def __init__(self, xi=0.0, yi=0.0, zi=0.0, atom1 = 0, atom2 = 0): + +class Vector: + """Vector""" + + def __init__(self, xi=0.0, yi=0.0, zi=0.0, atom1=None, atom2=None): + """Initialize vector. + + Args: + xi: default x-coordinate + yi: default y-coordinate + zi: default z-coordinate + atom1: atom center used to define default coordinate + atom2: two atom centers used to define vector + """ self.x = xi self.y = yi self.z = zi @@ -22,56 +32,53 @@ class vector: self.y = atom2.y - self.y self.z = atom2.z - self.z - return - def __add__(self, other): - return vector(self.x + other.x, + return Vector(self.x + other.x, self.y + other.y, - self.z + other.z) + self.z + other.z) def __sub__(self, other): - return vector(self.x - other.x, + return Vector(self.x - other.x, self.y - other.y, self.z - other.z) - - def __mul__(self, other): - """ Dot product, scalar and matrix multiplication """ - if isinstance(other,vector): + def __mul__(self, other): + """Dot product, scalar and matrix multiplication.""" + if isinstance(other, Vector): return self.x * other.x + self.y * other.y + self.z * other.z - elif isinstance(other, matrix4x4): - return vector( - xi = other.a11*self.x + other.a12*self.y + other.a13*self.z + other.a14*1.0, - yi = other.a21*self.x + other.a22*self.y + other.a23*self.z + other.a24*1.0, - zi = other.a31*self.x + other.a32*self.y + other.a33*self.z + other.a34*1.0 + elif isinstance(other, Matrix4x4): + return Vector( + xi=other.a11*self.x + other.a12*self.y + other.a13*self.z + other.a14*1.0, + yi=other.a21*self.x + other.a22*self.y + other.a23*self.z + other.a24*1.0, + zi=other.a31*self.x + other.a32*self.y + other.a33*self.z + other.a34*1.0 ) elif type(other) in [int, float]: - return vector(self.x * other, self.y * other, self.z * other) + return Vector(self.x * other, self.y * other, self.z * other) else: info('%s not supported' % type(other)) raise TypeError - def __rmul__(self,other): - return self.__mul__(other) - + def __rmul__(self, other): + return self.__mul__(other) def __pow__(self, other): - """ Cross product """ - return vector(self.y * other.z - self.z * other.y, + """Cross product.""" + return Vector(self.y * other.z - self.z * other.y, self.z * other.x - self.x * other.z, self.x * other.y - self.y * other.x) - def __neg__(self): - res = vector(xi = -self.x, - yi = -self.y, - zi = -self.z) + res = Vector(xi=-self.x, + yi=-self.y, + zi=-self.z) return res def sq_length(self): + """Return vector squared-length""" return self.x * self.x + self.y * self.y + self.z * self.z def length(self): + """Return vector length.""" return math.sqrt(self.sq_length()) def __str__(self): @@ -82,164 +89,220 @@ class vector: def orthogonal(self): """ Returns a vector orthogonal to self """ - res = vector(self.y, -self.x, 0) - + res = Vector(self.y, -self.x, 0) if abs(self.y) < abs(self.z): - res = vector(self.z, 0, -self.x) - + res = Vector(self.z, 0, -self.x) return res def rescale(self, new_length): """ Rescale vector to new length while preserving direction """ frac = new_length/(self.length()) - res = vector(xi = self.x*frac, - yi = self.y*frac, - zi = self.z*frac) + res = Vector(xi=self.x*frac, + yi=self.y*frac, + zi=self.z*frac) return res -class matrix4x4: +class Matrix4x4: + """A 4-by-4 matrix class.""" + def __init__(self, a11i=0.0, a12i=0.0, a13i=0.0, a14i=0.0, a21i=0.0, a22i=0.0, a23i=0.0, a24i=0.0, a31i=0.0, a32i=0.0, a33i=0.0, a34i=0.0, a41i=0.0, a42i=0.0, a43i=0.0, a44i=0.0): - + """Initialize with matrix elements.""" + # Row 1 self.a11 = a11i self.a12 = a12i self.a13 = a13i self.a14 = a14i - + # Row 2 self.a21 = a21i self.a22 = a22i self.a23 = a23i self.a24 = a24i - + # Row 3 self.a31 = a31i self.a32 = a32i self.a33 = a33i self.a34 = a34i - + # Row 4 self.a41 = a41i self.a42 = a42i self.a43 = a43i self.a44 = a44i - return + +def angle(avec, bvec): + """Get the angle between two vectors. + + Args: + avec: vector 1 + bvec: vector 2 + Returns: + angle in radians + """ + dot = avec * bvec + return math.acos(dot / (avec.length() * bvec.length())) +def angle_degrees(avec, bvec): + """Get the angle between two vectors in degrees. + + Args: + avec: vector 1 + bvec: vector 2 + Returns: + angle in degrees + """ + return math.degrees(angle(avec, bvec)) +def signed_angle_around_axis(avec, bvec, axis): + """Get signed angle of two vectors around axis in radians. -# methods working on vectors + Args: + avec: vector 1 + bvec: vector 2 + axis: axis + Returns: + angle in radians + """ + norma = avec**axis + normb = bvec**axis + ang = angle(norma, normb) + dot_ = bvec*(avec**axis) + if dot_ < 0: + ang = -ang + return ang -def angle(a, b): - dot = a * b - return math.acos(dot / (a.length() * b.length())) +def rotate_vector_around_an_axis(theta, axis, vec): + """Rotate vector around an axis. - -def angle_degrees(a,b): - return math.degrees(angle(a, b)) - - -def signed_angle_around_axis(a,b, axis): - na = a**axis - nb = b**axis - - v = angle(na,nb) - - d = b*(a**axis) - - if d < 0: - v =-v - - return v - -def signed_angle_degrees(a,b): - return 180/math.pi * signed_angle(a, b) - - -def rotate_vector_around_an_axis(theta, axis, v): - #print "# 1. rotate space about the z-axis so that the rotation axis lies in the xz-plane" - gamma = 0.0 - if axis.y != 0: - if axis.x != 0: - gamma = -axis.x/abs(axis.x)*math.asin(axis.y/(math.sqrt(axis.x*axis.x + axis.y*axis.y))) - else: - gamma = math.pi/2.0 - - Rz = rotate_atoms_around_z_axis(gamma) - v = Rz * v - axis = Rz * axis - - #print "# 2. rotate space about the y-axis so that the rotation axis lies along the z-axis" - beta = 0.0 + Args: + theta: rotation angle (in radians) + axis: axis for rotation + vec: vector to rotate + Returns: + rotated vector + """ + gamma = 0.0 + if axis.y != 0: if axis.x != 0: - beta = -axis.x/abs(axis.x)*math.acos(axis.z/math.sqrt(axis.x*axis.x + axis.z*axis.z)) - Ry = rotate_atoms_around_y_axis(beta) - v = Ry * v - axis = Ry *axis + gamma = -axis.x/abs(axis.x)*math.asin( + axis.y/(math.sqrt(axis.x*axis.x + axis.y*axis.y))) + else: + gamma = math.pi/2.0 + rot_z = rotate_atoms_around_z_axis(gamma) + vec = rot_z * vec + axis = rot_z * axis + beta = 0.0 + if axis.x != 0: + beta = -axis.x/abs(axis.x)*math.acos( + axis.z/math.sqrt(axis.x*axis.x + axis.z*axis.z)) + rot_y = rotate_atoms_around_y_axis(beta) + vec = rot_y * vec + axis = rot_y * axis + rot_z = rotate_atoms_around_z_axis(theta) + vec = rot_z * vec + rot_y = rotate_atoms_around_y_axis(-beta) + vec = rot_y * vec + rot_z = rotate_atoms_around_z_axis(-gamma) + vec = rot_z * vec + return vec - #print "# 3. perform the desired rotation by theta about the z-axis" - Rz = rotate_atoms_around_z_axis(theta) - v = Rz * v - #print "# 4. apply the inverse of step 2." - Ry = rotate_atoms_around_y_axis(-beta) - v = Ry * v - - #print "# 5. apply the inverse of step 1." - Rz = rotate_atoms_around_z_axis(-gamma) - v = Rz * v - - return v +def rotate_atoms_around_z_axis(theta): + """Get rotation matrix for z-axis. -def rotate_atoms_around_z_axis(angle): - Rz = matrix4x4( - a11i = math.cos(angle), a12i = -math.sin(angle), a13i = 0.0, a14i = 0.0, - a21i = math.sin(angle), a22i = math.cos(angle), a23i = 0.0, a24i = 0.0, - a31i = 0.0 , a32i = 0.0 , a33i = 1.0, a34i = 0.0, - a41i = 0.0 , a42i = 0.0 , a43i = 0.0, a44i = 1.0 + Args: + theta: angle of rotation (radians) + Returns: + rotation matrix + """ + return Matrix4x4( + a11i=math.cos(theta), + a12i=-math.sin(theta), + a13i=0.0, + a14i=0.0, + a21i=math.sin(theta), + a22i=math.cos(theta), + a23i=0.0, + a24i=0.0, + a31i=0.0, + a32i=0.0, + a33i=1.0, + a34i=0.0, + a41i=0.0, + a42i=0.0, + a43i=0.0, + a44i=1.0 ) - - return Rz -def rotate_atoms_around_y_axis(angle): - Ry = matrix4x4( - a11i = math.cos(angle), a12i = 0.0, a13i = math.sin(angle), a14i = 0.0, - a21i = 0.0 , a22i = 1.0, a23i = 0.0 , a24i = 0.0, - a31i = -math.sin(angle), a32i = 0.0, a33i = math.cos(angle), a34i = 0.0, - a41i = 0.0 , a42i = 0.0, a43i = 0.0 , a44i = 1.0 +def rotate_atoms_around_y_axis(theta): + """Get rotation matrix for y-axis. + + Args: + theta: angle of rotation (radians) + Returns: + rotation matrix + """ + return Matrix4x4( + a11i=math.cos(theta), + a12i=0.0, + a13i=math.sin(theta), + a14i=0.0, + a21i=0.0, + a22i=1.0, + a23i=0.0, + a24i=0.0, + a31i=-math.sin(theta), + a32i=0.0, + a33i=math.cos(theta), + a34i=0.0, + a41i=0.0, + a42i=0.0, + a43i=0.0, + a44i=1.0 ) - - return Ry +class MultiVector: + """Collection of vectors for multiple configurations of atoms. -class multi_vector: - def __init__(self, atom1=0, atom2=0): + TODO - this class does not appear to be used or covered by tests + """ + + def __init__(self, atom1=None, atom2=None): + """Initialize with atom configurations. + + Args: + atom1: first atom to define vector + atom2: second atom to define vector + """ self.vectors = [] self.keys = [] - + self.result = None # store vectors for all configurations of atoms - if atom1!=0: - self.keys = lib.get_sorted_configurations(atom1.configurations.keys()) - if atom2!=0: - keys2 = lib.get_sorted_configurations(atom2.configurations.keys()) + if atom1 is not None: + self.keys = get_sorted_configurations(atom1.configurations.keys()) + if atom2 is not None: + keys2 = get_sorted_configurations(atom2.configurations.keys()) if self.keys != keys2: - raise 'Cannot make multi vector: Atomic configurations mismatch for\n %s\n %s\n'%(atom1,atom2) + str_ = ('Cannot make multi vector: Atomic configurations ' + 'mismatch for\n %s\n %s\n' % (atom1, atom2)) + raise KeyError(str_) for key in self.keys: atom1.setConfiguration(key) - if atom2!=0: + if atom2 != 0: atom2.setConfiguration(key) - v = vector(atom1=atom1, atom2=atom2) - self.vectors.append(v) - #info(key,v) - return - - def __getattribute__(self,name): + vec = Vector(atom1=atom1, atom2=atom2) + self.vectors.append(vec) + + def __getattribute__(self, name): try: return object.__getattribute__(self, name) except AttributeError: @@ -247,72 +310,103 @@ class multi_vector: def __str__(self): res = '' - for i in range(len(self.keys)): - res += '%s %s\n'%(self.keys[i], self.vectors[i]) + for i, key in enumerate(self.keys): + res += '%s %s\n' % (key, self.vectors[i]) return res - def do_job(self, job): - #info(job) - self.res = multi_vector() - for i in range(len(self.vectors)): - self.res.vectors.append(eval('self.vectors[%d].%s()'%(i,job))) - self.res.keys.append(self.keys[i]) + """Append vectors to configuration. + + Args: + job: name of function to apply to vectors + Returns: + TODO - figure out what this is + """ + self.result = MultiVector() + for i, vector in enumerate(self.vectors): + func = getattr(vector, job) + self.result.vectors.append(func()) + self.result.keys.append(self.keys[i]) return self.get_result + @property def get_result(self): - return self.res - - def generic_operation(self, operation, other): - if self.keys != other.keys: - raise 'Incompatable keys' + """Return the latest result.""" + return self.result - self.res = multi_vector() + def generic_operation(self, operation, other): + """Perform a generic operation between two MultiVector objects. + + Args: + operation: operation to perform (string) + other: other MultiVector object + """ + if self.keys != other.keys: + raise 'Incompatible keys' + self.result = MultiVector() for i in range(len(self.vectors)): - self.res.vectors.append(eval('self.vectors[%d] %s other.vectors[%d]'%(i,operation,i))) - self.res.keys.append(self.keys[i]) - return + self.result.vectors.append( + # TODO - eliminate eval() or entire class + eval('self.vectors[%d] %s other.vectors[%d]' + % (i, operation, i))) + self.result.keys.append(self.keys[i]) def __add__(self, other): - self.generic_operation('+',other) - return self.res + self.generic_operation('+', other) + return self.result def __sub__(self, other): - self.generic_operation('-',other) - return self.res + self.generic_operation('-', other) + return self.result def __mul__(self, other): - self.generic_operation('*',other) - return self.res + self.generic_operation('*', other) + return self.result def __pow__(self, other): - self.generic_operation('**',other) - return self.res + self.generic_operation('**', other) + return self.result - def generic_self_operation(self, operation): + @staticmethod + def generic_self_operation(_): + """TODO - delete this.""" return def __neg__(self): - self.generic_operation('*',-1.0) - return self.res + self.generic_operation('*', -1.0) + return self.result def rescale(self, new_length): - self.res = multi_vector() - for i in range(len(self.vectors)): - self.res.vectors.append(self.vectors[i].rescale(new_length)) - self.res.keys.append(self.keys[i]) + """Rescale multi-vector to new length. + + Args: + new_length: new length for multi-vector + Result: + MultiVector object + """ + self.result = MultiVector() + for i, vector in enumerate(self.vectors): + self.result.vectors.append(vector.rescale(new_length)) + self.result.keys.append(self.keys[i]) return self.res -def rotate_multi_vector_around_an_axis(theta, axis, v): - """ both axis ans v must be multi_vectors """ - - if axis.keys != v.keys: - raise 'Incompatible keys in rotate multi_vector' - - res = multi_vector() - for i in range(len(v.keys)): - res.vectors.append(rotate_vector_around_an_axis(theta, axis.vectors[i], v.vectors[i])) - res.keys.append(v.keys[i]) - +def rotate_multi_vector_around_an_axis(theta, axis, vec): + """Rotate a multi-vector around an axis. + + NOTE - both axis ans v must be MultiVectors. + + Args: + theta: angle (in radians) + axis: multi-vector axis + vec: multi-vector vector + """ + if axis.keys != vec.keys: + raise 'Incompatible keys in rotate MultiVector' + res = MultiVector() + for i, key in enumerate(vec.keys): + res.vectors.append(rotate_vector_around_an_axis(theta, + axis.vectors[i], + vec.vectors[i])) + res.keys.append(key) return res From 2fef500307d1d4fb22a6a49dec5c5eb6afc39baf Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 14:31:17 -0700 Subject: [PATCH 31/65] Restore 3.0 compatibility functions. --- propka/calculations.py | 58 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/propka/calculations.py b/propka/calculations.py index c2f15b5..144755e 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -2,7 +2,7 @@ import math import propka.protonate import propka.bonds -from propka.lib import warning +from propka.lib import warning, info # TODO - this file should be broken into three separate files: @@ -96,8 +96,6 @@ def setup_bonding_and_protonation(parameters, molecular_container): def setup_bonding(molecular_container): """Set up bonding for a molecular container. - TODO - figure out why there is a similar function in version.py - Args: molecular_container: the molecular container in question Returns: @@ -108,6 +106,60 @@ def setup_bonding(molecular_container): return my_bond_maker +def setup_bonding_and_protonation_30_style(parameters, molecular_container): + """Set up bonding for a molecular container. + + Args: + parameters: parameters for calculation + molecular_container: the molecular container in question + Returns: + BondMaker object + """ + # Protonate atoms + protonate_30_style(molecular_container) + # make bonds + bond_maker = propka.bonds.BondMaker() + bond_maker.find_bonds_for_molecules_using_boxes(molecular_container) + return bond_maker + + +def protonate_30_style(molecular_container): + """Protonate the molecule. + + Args: + molecular_container: molecule + """ + for name in molecular_container.conformation_names: + info('Now protonating', name) + # split atom into residues + curres = -1000000 + residue = [] + o_atom = None + c_atom = None + for atom in molecular_container.conformations[name].atoms: + if atom.res_num != curres: + curres = atom.res_num + if len(residue) > 0: + #backbone + [o_atom, c_atom] = add_backbone_hydrogen( + residue, o_atom, c_atom) + #arginine + if residue[0].res_name == 'ARG': + add_arg_hydrogen(residue) + #histidine + if residue[0].res_name == 'HIS': + add_his_hydrogen(residue) + #tryptophan + if residue[0].res_name == 'TRP': + add_trp_hydrogen(residue) + #amides + if residue[0].res_name in ['GLN', 'ASN']: + add_amd_hydrogen(residue) + residue = [] + if atom.type == 'atom': + residue.append(atom) + + def set_ligand_atom_names(molecular_container): """Set names for ligands in molecular container. From 98263516bc828f89f3fb0be4b9295b9d970cdf20 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 14:35:02 -0700 Subject: [PATCH 32/65] De-lint version.py --- propka/calculations.py | 2 +- propka/conformation_container.py | 2 +- propka/molecular_container.py | 3 +- propka/propka.cfg | 2 +- propka/version.py | 309 +++++++++++++++++++------------ 5 files changed, 192 insertions(+), 126 deletions(-) diff --git a/propka/calculations.py b/propka/calculations.py index 144755e..711cd60 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -647,7 +647,7 @@ def hydrogen_bond_interaction(group1, group2, version): # Do nothing, value should have been assigned. pass else: - value = version.calculateSideChainEnergy(dist, dpka_max, cutoff, weight, + value = version.calculate_side_chain_energy(dist, dpka_max, cutoff, weight, f_angle) return value diff --git a/propka/conformation_container.py b/propka/conformation_container.py index 107a0a8..bc0698c 100644 --- a/propka/conformation_container.py +++ b/propka/conformation_container.py @@ -195,7 +195,7 @@ class ConformationContainer: # setting ion determinants set_ion_determinants(self, version) # calculating the back-bone reorganization/desolvation term - version.calculatebackbone_reorganization(self) + version.calculate_backbone_reorganization(self) # setting remaining non-iterative and iterative side-chain & Coulomb # interaction determinants set_determinants(self.get_sidechain_groups(), version=version, diff --git a/propka/molecular_container.py b/propka/molecular_container.py index 76768c0..3ef1478 100644 --- a/propka/molecular_container.py +++ b/propka/molecular_container.py @@ -48,7 +48,8 @@ class Molecular_container: try: version_class = getattr(propka.version, parameters.version) self.version = version_class(parameters) - except: + except AttributeError as err: + print(err) errstr = 'Error: Version %s does not exist' % parameters.version raise Exception(errstr) # read the input file diff --git a/propka/propka.cfg b/propka/propka.cfg index 58b1aec..c78ea43 100644 --- a/propka/propka.cfg +++ b/propka/propka.cfg @@ -1,6 +1,6 @@ # PropKa configuration file -version version_A +version VersionA # Model pKa values model_pkas C- 3.20 diff --git a/propka/version.py b/propka/version.py index 86c4268..b994cbc 100644 --- a/propka/version.py +++ b/propka/version.py @@ -1,216 +1,281 @@ -from __future__ import division -from __future__ import print_function -import math -import sys, os +"""Contains version-specific methods and parameters. -import propka.lib as lib -from propka.lib import info, warning -import propka.calculations as calculations -import propka.parameters +TODO - this module unnecessarily confuses the code. Can we eliminate it? +""" +from propka.lib import info +import propka.calculations as calcs -class version: - def __init__(self,parameters): +class Version: + """Store version-specific methods and parameters.""" + def __init__(self, parameters): self.parameters = parameters - return + self.desolvation_model = self.empty_function + self.weight_pair_method = self.empty_function + self.hydrogen_bond_interaction_model = self.empty_function + self.sidechain_interaction_model = self.empty_function + self.electrostatic_interaction_model = self.empty_function + self.coulomb_interaction_model = self.empty_function + self.check_coulomb_pair_method = self.empty_function + self.backbone_reorganisation_method = self.empty_function + self.exception_check_method = self.empty_function + self.molecular_preparation_method = self.empty_function + self.prepare_bonds = self.empty_function + + @staticmethod + def empty_function(*args): + """Placeholder function so we don't use uninitialized variables. + + Args: + args: whatever arguments would have been passed to the function + Raises: + NotImplementedError + """ + err = "Called an empty Version function with args %s" % args + raise NotImplementedError(err) - # desolvation def calculate_desolvation(self, group): + """Calculate desolvation energy using assigned model.""" return self.desolvation_model(self.parameters, group) def calculate_pair_weight(self, num_volume1, num_volume2): + """Calculate pair weight using assigned model.""" return self.weight_pair_method(self.parameters, num_volume1, num_volume2) - # side chains def hydrogen_bond_interaction(self, group1, group2): + """Calculate H-bond energy using assigned model.""" return self.hydrogen_bond_interaction_model(group1, group2, self) - def calculateSideChainEnergy(self, distance, dpka_max, cutoff, weight, f_angle): - return self.sidechain_interaction_model(distance, dpka_max, cutoff, f_angle) # weight is ignored in 3.0 Sep07 + def calculate_side_chain_energy(self, distance, dpka_max, cutoff, _, f_angle): + """Calculate sidechain energy using assigned model.""" + return self.sidechain_interaction_model(distance, dpka_max, cutoff, f_angle) - # coulomb def electrostatic_interaction(self, group1, group2, distance): + """Calculate electrostatic energy using assigned model.""" return self.electrostatic_interaction_model(group1, group2, distance, self) def calculate_coulomb_energy(self, distance, weight): + """Calculate Coulomb energy using assigned model.""" return self.coulomb_interaction_model(distance, weight, self.parameters) def check_coulomb_pair(self, group1, group2, distance): + """Check Coulomb pair using assigned model.""" return self.check_coulomb_pair_method(self.parameters, group1, group2, distance) - # backbone re-organisation - def calculatebackbone_reorganization(self, conformation): + def calculate_backbone_reorganization(self, conformation): + """Calculate backbone reorganization using assigned model.""" return self.backbone_reorganisation_method(self.parameters, conformation) - # exceptions def check_exceptions(self, group1, group2): + """Calculate exceptions using assigned model.""" return self.exception_check_method(self, group1, group2) def setup_bonding_and_protonation(self, molecular_container): + """Setup bonding and protonation using assigned model.""" return self.molecular_preparation_method(self.parameters, molecular_container) def setup_bonding(self, molecular_container): + """Setup bonding using assigned model.""" return self.prepare_bonds(self.parameters, molecular_container) +class VersionA(Version): + """TODO - figure out what this is.""" -class version_A(version): def __init__(self, parameters): + """Initialize object with parameters.""" # set the calculation rutines used in this version - version.__init__(self, parameters) - - # atom naming, bonding, and protonation - self.molecular_preparation_method = propka.calculations.setup_bonding_and_protonation - self.prepare_bonds = propka.calculations.setup_bonding - - - # desolvation related methods - self.desolvation_model = calculations.radial_volume_desolvation - self.weight_pair_method = calculations.calculate_pair_weight - - # side chain methods - self.sidechain_interaction_model = propka.calculations.hydrogen_bond_energy - self.hydrogen_bond_interaction_model = propka.calculations.hydrogen_bond_interaction - - # colomb methods - self.electrostatic_interaction_model = propka.calculations.electrostatic_interaction - self.check_coulomb_pair_method = propka.calculations.check_coulomb_pair - self.coulomb_interaction_model = propka.calculations.coulomb_energy - - #backbone - self.backbone_interaction_model = propka.calculations.hydrogen_bond_energy - self.backbone_reorganisation_method = propka.calculations.backbone_reorganization - - # exception methods - self.exception_check_method = propka.calculations.check_exceptions - return + super().__init__(parameters) + self.molecular_preparation_method = calcs.setup_bonding_and_protonation + self.prepare_bonds = calcs.setup_bonding + self.desolvation_model = calcs.radial_volume_desolvation + self.weight_pair_method = calcs.calculate_pair_weight + self.sidechain_interaction_model = calcs.hydrogen_bond_energy + self.hydrogen_bond_interaction_model = calcs.hydrogen_bond_interaction + self.electrostatic_interaction_model = calcs.electrostatic_interaction + self.check_coulomb_pair_method = calcs.check_coulomb_pair + self.coulomb_interaction_model = calcs.coulomb_energy + self.backbone_interaction_model = calcs.hydrogen_bond_energy + self.backbone_reorganisation_method = calcs.backbone_reorganization + self.exception_check_method = calcs.check_exceptions def get_hydrogen_bond_parameters(self, atom1, atom2): + """Get hydrogen bond parameters for two atoms. + + Args: + atom1: first atom + atom2: second atom + Returns: + [dpka_max, cutoff] + """ dpka_max = self.parameters.sidechain_interaction - cutoff = self.parameters.sidechain_cutoffs.get_value(atom1.group_type, atom2.group_type) + cutoff = self.parameters.sidechain_cutoffs.get_value( + atom1.group_type, atom2.group_type) return [dpka_max, cutoff] def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom): + """Get hydrogen bond parameters between backbone atom and other atom. + + Args: + backbone_atom: backbone atom + atom: other atom + Returns + [v, [c1, c3]] TODO - figure out what this is + """ if backbone_atom.group_type == 'BBC': if atom.group_type in self.parameters.backbone_CO_hydrogen_bond.keys(): - [v,c1,c2] = self.parameters.backbone_CO_hydrogen_bond[atom.group_type] - return [v,[c1,c2]] - + [v, c1, c2] = self.parameters.backbone_CO_hydrogen_bond[ + atom.group_type] + return [v, [c1, c2]] if backbone_atom.group_type == 'BBN': if atom.group_type in self.parameters.backbone_NH_hydrogen_bond.keys(): - [v,c1,c2] = self.parameters.backbone_NH_hydrogen_bond[atom.group_type] - return [v,[c1,c2]] - + [v, c1, c2] = self.parameters.backbone_NH_hydrogen_bond[ + atom.group_type] + return [v, [c1, c2]] return None +class SimpleHB(VersionA): + """A simple hydrogen bond version.""" - -class simple_hb(version_A): def __init__(self, parameters): + """Initialize object with parameters.""" # set the calculation rutines used in this version - version_A.__init__(self, parameters) + super().__init__(parameters) info('Using simple hb model') - return def get_hydrogen_bond_parameters(self, atom1, atom2): - return self.parameters.hydrogen_bonds.get_value(atom1.element, atom2.element) + """Get hydrogen bond parameters for two atoms. + Args: + atom1: first atom + atom2: second atom + Returns: + [dpka_max, cutoff] + """ + return self.parameters.hydrogen_bonds.get_value( + atom1.element, atom2.element) def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom): - return self.parameters.hydrogen_bonds.get_value(backbone_atom.element, atom.element) + """Get hydrogen bond parameters between backbone atom and other atom. + + Args: + backbone_atom: backbone atom + atom: other atom + Returns + [v, [c1, c3]] TODO - figure out what this is + """ + return self.parameters.hydrogen_bonds.get_value( + backbone_atom.element, atom.element) +class ElementBasedLigandInteractions(VersionA): + """TODO - figure out what this is.""" - -class element_based_ligand_interactions(version_A): def __init__(self, parameters): + """Initialize object with parameters.""" # set the calculation rutines used in this version - version_A.__init__(self, parameters) + super().__init__(parameters) info('Using detailed SC model!') return def get_hydrogen_bond_parameters(self, atom1, atom2): - if not 'hetatm' in [atom1.type, atom2.type]: - # this is a protein-protein interaction - dpka_max = self.parameters.sidechain_interaction.get_value(atom1.group_type, atom2.group_type) - cutoff = self.parameters.sidechain_cutoffs.get_value(atom1.group_type, atom2.group_type) - return [dpka_max, cutoff] + """Get hydrogen bond parameters for two atoms. + Args: + atom1: first atom + atom2: second atom + Returns: + [dpka_max, cutoff] + """ + if 'hetatm' not in [atom1.type, atom2.type]: + # this is a protein-protein interaction + dpka_max = self.parameters.sidechain_interaction.get_value( + atom1.group_type, atom2.group_type) + cutoff = self.parameters.sidechain_cutoffs.get_value( + atom1.group_type, atom2.group_type) + return [dpka_max, cutoff] # at least one ligand atom is involved in this interaction # make sure that we are using the heavy atoms for finding paramters elements = [] - for a in [atom1, atom2]: - if a.element == 'H': elements.append(a.bonded_atoms[0].element) - else: elements.append(a.element) - - return self.parameters.hydrogen_bonds.get_value(elements[0], elements[1]) - + for atom in [atom1, atom2]: + if atom.element == 'H': + elements.append(atom.bonded_atoms[0].element) + else: + elements.append(atom.element) + return self.parameters.hydrogen_bonds.get_value( + elements[0], elements[1]) def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom): + """Get hydrogen bond parameters between backbone atom and other atom. + + Args: + backbone_atom: backbone atom + atom: other atom + Returns + [v, [c1, c3]] TODO - figure out what this is + """ if atom.type == 'atom': # this is a backbone-protein interaction - if backbone_atom.group_type == 'BBC' and\ - atom.group_type in self.parameters.backbone_CO_hydrogen_bond.keys(): - [v,c1,c2] = self.parameters.backbone_CO_hydrogen_bond[atom.group_type] - return [v,[c1,c2]] + if (backbone_atom.group_type == 'BBC' + and atom.group_type + in self.parameters.backbone_CO_hydrogen_bond.keys()): + [v, c1, c2] = self.parameters.backbone_CO_hydrogen_bond[ + atom.group_type] + return [v, [c1, c2]] - if backbone_atom.group_type == 'BBN' and\ - atom.group_type in self.parameters.backbone_NH_hydrogen_bond.keys(): - [v,c1,c2] = self.parameters.backbone_NH_hydrogen_bond[atom.group_type] - return [v,[c1,c2]] + if (backbone_atom.group_type == 'BBN' + and atom.group_type + in self.parameters.backbone_NH_hydrogen_bond.keys()): + [v, c1, c2] = self.parameters.backbone_NH_hydrogen_bond[ + atom.group_type] + return [v, [c1, c2]] else: # this is a backbone-ligand interaction # make sure that we are using the heavy atoms for finding paramters elements = [] - for a in [backbone_atom, atom]: - if a.element == 'H': elements.append(a.bonded_atoms[0].element) - else: elements.append(a.element) - - res = self.parameters.hydrogen_bonds.get_value(elements[0], elements[1]) + for atom2 in [backbone_atom, atom]: + if atom2.element == 'H': + elements.append(atom2.bonded_atoms[0].element) + else: + elements.append(atom2.element) + res = self.parameters.hydrogen_bonds.get_value( + elements[0], elements[1]) if not res: info('Could not determine backbone interaction parameters for:', backbone_atom, atom) - - return - + return None return None +class Propka30(Version): + """Version class for PROPKA 3.0.""" -class propka30(version): def __init__(self, parameters): - # set the calculation rutines used in this version - version.__init__(self, parameters) - - # atom naming, bonding, and protonation - self.molecular_preparation_method = propka.calculations.setup_bonding_and_protonation_30_style - - # desolvation related methods - self.desolvation_model = calculations.radial_volume_desolvation - self.weight_pair_method = calculations.calculate_pair_weight - - # side chain methods - self.sidechain_interaction_model = propka.calculations.hydrogen_bond_energy - - # colomb methods - self.check_coulomb_pair_method = propka.calculations.check_coulomb_pair - self.coulomb_interaction_model = propka.calculations.coulomb_energy - - #backbone - self.backbone_reorganisation_method = propka.calculations.backbone_reorganization - - # exception methods - self.exception_check_method = propka.calculations.check_exceptions - - - return + """Initialize object with parameters.""" + # set the calculation routines used in this version + super().__init__(parameters) + self.molecular_preparation_method = ( + calcs.setup_bonding_and_protonation_30_style) + self.desolvation_model = calcs.radial_volume_desolvation + self.weight_pair_method = calcs.calculate_pair_weight + self.sidechain_interaction_model = calcs.hydrogen_bond_energy + self.check_coulomb_pair_method = calcs.check_coulomb_pair + self.coulomb_interaction_model = calcs.coulomb_energy + self.backbone_reorganisation_method = calcs.backbone_reorganization + self.exception_check_method = calcs.check_exceptions def get_hydrogen_bond_parameters(self, atom1, atom2): - dpka_max = self.parameters.sidechain_interaction.get_value(atom1.group_type, atom2.group_type) - cutoff = self.parameters.sidechain_cutoffs.get_value(atom1.group_type, atom2.group_type) + """Get hydrogen bond parameters for two atoms. + + Args: + atom1: first atom + atom2: second atom + Returns: + [dpka_max, cutoff] + """ + dpka_max = self.parameters.sidechain_interaction.get_value( + atom1.group_type, atom2.group_type) + cutoff = self.parameters.sidechain_cutoffs.get_value( + atom1.group_type, atom2.group_type) return [dpka_max, cutoff] - - - - From d30705d4909c36b8aa5923fb994dab4d329d62f6 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 14:38:20 -0700 Subject: [PATCH 33/65] De-lint propka31.py. --- scripts/propka31.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/scripts/propka31.py b/scripts/propka31.py index 3b8c81e..d3459c8 100755 --- a/scripts/propka31.py +++ b/scripts/propka31.py @@ -1,32 +1,30 @@ #!/usr/bin/env python +"""PROPKA script. -# This is the original propka script. However, this distribute-based -# installation moved the main() function into propka.run.main and just -# generates a script called propka31 from the setup.py installation -# script. You should not need to use this script. -# -# (Also note that there can be import problems because the script name -# is the same as the module name; that's why the new script is called -# propka31.) +This is the original propka script. However, this distribute-based +installation moved the main() function into propka.run.main and just +generates a script called propka31 from the setup.py installation +script. You should not need to use this script. + +(Also note that there can be import problems because the script name +is the same as the module name; that's why the new script is called +propka31.) +""" +from propka.lib import loadOptions +from propka.molecular_container import Molecular_container -import propka.lib, propka.molecular_container def main(): - """ - Reads in structure files, calculates pKa values, and prints pKa files - """ + """Read in structure files, calculates pKa values, and prints pKa files.""" # loading options, flaggs and arguments - options = propka.lib.loadOptions() + options = loadOptions([]) pdbfiles = options.filenames for pdbfile in pdbfiles: - my_molecule = propka.molecular_container.Molecular_container(pdbfile, options) + my_molecule = Molecular_container(pdbfile, options) my_molecule.calculate_pka() my_molecule.write_pka() if __name__ == '__main__': - #import cProfile - #cProfile.run('main()',sort=1) main() - From 75cdf0ea041cf4d859a08bb0cc2c7ad10c0b8a12 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 14:44:53 -0700 Subject: [PATCH 34/65] De-lint tests. --- tests/test_basic_regression.py | 11 ++++++----- tests/test_hybrid36.py | 25 ++++++++++++++----------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/tests/test_basic_regression.py b/tests/test_basic_regression.py index 7e2a687..60a72ba 100644 --- a/tests/test_basic_regression.py +++ b/tests/test_basic_regression.py @@ -100,8 +100,8 @@ def compare_output(pdb, tmp_path, ref_path): elif line.startswith("---"): at_pka = False else: - m = re.search(r'([0-9]+\.[0-9]+)', line) - value = float(m.group(0)) + match = re.search(r'([0-9]+\.[0-9]+)', line) + value = float(match.group(0)) test_data.append(value) errstr = "Error exceeds maximum allowed value (%d decimal places)" % MAX_ERR_DECIMALS assert_almost_equal(test_data, ref_data, decimal=MAX_ERR_DECIMALS, @@ -113,9 +113,10 @@ def compare_output(pdb, tmp_path, ref_path): pytest.param('1HPX', [], id="1HPX: no options"), pytest.param('4DFR', [], id="4DFR: no options"), pytest.param('3SGB', [], id="3SGB: no options"), - pytest.param('3SGB-subset', ["--titrate_only", - "E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"], - id="3SGB: --titrate_only"), + pytest.param('3SGB-subset', [ + "--titrate_only", + "E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"], + id="3SGB: --titrate_only"), pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet")]) def test_regression(pdb, options, tmp_path): """Basic regression test of PROPKA functionality.""" diff --git a/tests/test_hybrid36.py b/tests/test_hybrid36.py index 1af74d0..261db2a 100644 --- a/tests/test_hybrid36.py +++ b/tests/test_hybrid36.py @@ -1,9 +1,13 @@ +"""Test the hybrid36 module.""" import unittest - import propka.hybrid36 as hybrid36 + class Hybrid36Test(unittest.TestCase): - def testDecode(self): + """Test class for hybrid36.""" + + def test_decode(self): + """Test decoding functions.""" test_values = { "99999": 99999, "A0000": 100000, @@ -37,11 +41,11 @@ class Hybrid36Test(unittest.TestCase): "A001Z": 100071, "B0000": 1779616, } + for key, value in test_values.items(): + self.assertEqual(hybrid36.decode(key), value) - for k, v in test_values.items(): - self.assertEqual(hybrid36.decode(k), v) - - def testErrors(self): + def test_errors(self): + """Test values that should raise errors.""" test_values = [ "99X99", "X9-99", @@ -50,8 +54,7 @@ class Hybrid36Test(unittest.TestCase): "-", "!NotOk", ] - - for v in test_values: - with self.assertRaises(ValueError) as e: - hybrid36.decode(v) - self.assertTrue(v in str(e.exception)) \ No newline at end of file + for value in test_values: + with self.assertRaises(ValueError) as err: + hybrid36.decode(value) + self.assertTrue(value in str(err.exception)) From 4408b23448f897fef4c26734cf1ec5bd5487fc66 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 16:42:09 -0700 Subject: [PATCH 35/65] Clean up line breaks. For https://github.com/jensengroup/propka-3.1/issues/43 --- propka/atom.py | 55 +++++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/propka/atom.py b/propka/atom.py index 37b6da6..b66587c 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -8,12 +8,12 @@ from . import hybrid36 class Atom(object): """Atom class - contains all atom information found in the PDB file""" - def __init__(self, line=None, verbose=False): + def __init__(self, line=None, _=False): """Initialize Atom object. Args: line: Line from a PDB file to set properties of atom. - verbose: TODO - this does not appear to be used. Can we remove it? + _: TODO - this does not appear to be used. Can we remove it? """ self.occ = None self.numb = None @@ -48,7 +48,8 @@ class Atom(object): self.num_pi_elec_conj_2_3_bonds = 0 self.groups_extracted = 0 self.set_properties(line) - self.residue_label = "%-3s%4d%2s" % (self.name, self.res_num, self.chain_id) + self.residue_label = "%-3s%4d%2s" % ( + self.name, self.res_num, self.chain_id) # ligand atom types self.sybyl_type = '' @@ -101,7 +102,8 @@ class Atom(object): if len(self.name) == 4: self.element = self.element[0] if len(self.element) == 2: - self.element = '%1s%1s' % (self.element[0], self.element[1].lower()) + self.element = '%1s%1s' % ( + self.element[0], self.element[1].lower()) def set_group_type(self, type_): """Set group type of atom. @@ -156,12 +158,14 @@ class Atom(object): if ba == other_atom: return True if max_bonds > cur_bond: - if ba.is_atom_within_bond_distance(other_atom, max_bonds, cur_bond+1): + if ba.is_atom_within_bond_distance(other_atom, max_bonds, + cur_bond+1): return True return False def set_property(self, numb=None, name=None, res_name=None, chain_id=None, - res_num=None, x=None, y=None, z=None, occ=None, beta=None): + res_num=None, x=None, y=None, z=None, occ=None, + beta=None): """Set properties of the atom object. Args: @@ -225,7 +229,8 @@ class Atom(object): """PDB line for this atom. TODO - Could be @property method/attribute - TODO - figure out difference between make_pdb_line, make_input_line, and make_pdb_line2 + TODO - figure out difference between make_pdb_line, make_input_line, + and make_pdb_line2 Returns: String with PDB-format line. @@ -239,11 +244,12 @@ class Atom(object): if self.group.titratable: model_pka = '%6.2f'%self.group.model_pka - str_ = "%-6s%5d %s " % (self.type.upper(), self.numb, - propka.lib.make_tidy_atom_label(self.name, self.element)) - str_ += "%s%2s%4d%12.3lf%8.3lf%8.3lf%6s%6s \n" % (self.res_name, self.chain_id, - self.res_num, self.x, self.y, - self.z, group, model_pka) + str_ = "%-6s%5d %s " % ( + self.type.upper(), self.numb, + propka.lib.make_tidy_atom_label(self.name, self.element)) + str_ += "%s%2s%4d%12.3lf%8.3lf%8.3lf%6s%6s \n" % ( + self.res_name, self.chain_id, self.res_num, self.x, self.y, + self.z, group, model_pka) return str_ def make_conect_line(self): @@ -294,7 +300,9 @@ class Atom(object): group_attr = getattr(propka.group, group_attr) self.group = group_attr(self) except: - raise Exception('%s in input_file is not recognized as a group' % self.occ) + # TODO - be more specific with expection handling here + str_ = '%s in input_file is not recognized as a group' % self.occ + raise Exception(str_) # set the model pKa value if self.beta != '-': self.group.model_pka = float(self.beta) @@ -307,7 +315,8 @@ class Atom(object): """Create PDB line. TODO - this could/should be a @property method/attribute - TODO - figure out difference between make_pdb_line, make_input_line, and make_pdb_line2 + TODO - figure out difference between make_pdb_line, make_input_line, + and make_pdb_line2 Returns: String with PDB line. @@ -330,11 +339,11 @@ class Atom(object): Returns: String with MOL2 line. """ - str_ = "%-4d %-4s " % (id_, propka.lib.make_tidy_atom_label(self.name, - self.element)) + str_ = "%-4d %-4s " % ( + id_, propka.lib.make_tidy_atom_label(self.name, self.element)) str_ += "%10.4f %10.4f %10.4f " % (self.x, self.y, self.z) - str_ += "%6s %6d %10s %10.4f\n" % (self.sybyl_type.replace('-', ''), - self.res_num, self.res_name, 0.0) + str_ += "%6s %6d %10s %10.4f\n" % ( + self.sybyl_type.replace('-', ''), self.res_num, self.res_name, 0.0) return str_ def make_pdb_line2(self, numb=None, name=None, res_name=None, chain_id=None, @@ -342,7 +351,8 @@ class Atom(object): """Create a PDB line. TODO - this could/should be a @property method/attribute - TODO - figure out difference between make_pdb_line, make_input_line, and make_pdb_line2 + TODO - figure out difference between make_pdb_line, make_input_line, + and make_pdb_line2 Returns: String with PDB line. @@ -392,10 +402,9 @@ class Atom(object): def __str__(self): """Return an undefined-format string version of this atom.""" - return '%5d-%4s %5d-%3s (%1s) [%8.3f %8.3f %8.3f] %s' % (self.numb, self.name, - self.res_num, self.res_name, - self.chain_id, self.x, self.y, - self.z, self.element) + return '%5d-%4s %5d-%3s (%1s) [%8.3f %8.3f %8.3f] %s' % ( + self.numb, self.name, self.res_num, self.res_name, self.chain_id, + self.x, self.y, self.z, self.element) def set_residue(self, residue): """ Makes a reference to the parent residue From 142fa5a0047ed2cb00ab7cc614b09db46d85d7cf Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 17:18:57 -0700 Subject: [PATCH 36/65] Clean up line breaks in bonds.py. See https://github.com/jensengroup/propka-3.1/issues/43 --- propka/bonds.py | 106 ++++++++++++++++++++++++++++-------------------- 1 file changed, 63 insertions(+), 43 deletions(-) diff --git a/propka/bonds.py b/propka/bonds.py index 367c6d1..0144f78 100644 --- a/propka/bonds.py +++ b/propka/bonds.py @@ -24,18 +24,23 @@ class BondMaker: """ def __init__(self): # predefined bonding distances - self.distances = {'S-S' : DISULFIDE_DISTANCE, 'F-F' : FLUORIDE_DISTANCE} + self.distances = {'S-S' : DISULFIDE_DISTANCE, + 'F-F' : FLUORIDE_DISTANCE} self.distances_squared = {} for key in self.distances: - self.distances_squared[key] = self.distances[key] * self.distances[key] + self.distances_squared[key] = ( + self.distances[key] * self.distances[key]) h_dist = HYDROGEN_DISTANCE self.default_dist = DEFAULT_DISTANCE self.h_dist_squared = h_dist * h_dist self.default_dist_squared = self.default_dist * self.default_dist - distances = list(self.distances_squared.values()) + [self.default_dist_squared] + distances = ( + list(self.distances_squared.values()) + + [self.default_dist_squared]) self.max_sq_distance = max(distances) # protein bonding data - self.data_file_name = pkg_resources.resource_filename(__name__, 'protein_bonds.json') + self.data_file_name = ( + pkg_resources.resource_filename(__name__, 'protein_bonds.json')) with open(self.data_file_name, 'rt') as json_file: self.protein_bonds = json.load(json_file) self.intra_residue_backbone_bonds = {'N': ['CA'], 'CA': ['N', 'C'], @@ -74,7 +79,7 @@ class BondMaker: self.num_box_z = None def find_bonds_for_protein(self, protein): - """Finds bonds proteins based on the way atoms normally bond in proteins. + """Bonds proteins based on the way atoms normally bond. Args: protein: the protein to search for bonds @@ -90,9 +95,12 @@ class BondMaker: last_residues = [] for chain in protein.chains: for i in range(1, len(chain.residues)): - if chain.residues[i-1].res_name.replace(' ', '') not in ['N+', 'C-']: - if chain.residues[i].res_name.replace(' ', '') not in ['N+', 'C-']: - self.connect_backbone(chain.residues[i-1], chain.residues[i]) + if (chain.residues[i-1].res_name.replace(' ', '') + not in ['N+', 'C-']): + if (chain.residues[i].res_name.replace(' ', '') + not in ['N+', 'C-']): + self.connect_backbone(chain.residues[i-1], + chain.residues[i]) last_residues.append(chain.residues[i]) info('++++ terminal oxygen ++++') # terminal OXT @@ -119,7 +127,8 @@ class BondMaker: if atom1.name == 'SG': for atom2 in cys2.atoms: if atom2.name == 'SG': - dist = propka.calculations.squared_distance(atom1, atom2) + dist = propka.calculations.squared_distance(atom1, + atom2) # TODO - is SS_dist_squared an attribute of this class? if dist < self.SS_dist_squared: self.make_bond(atom1, atom2) @@ -149,8 +158,8 @@ class BondMaker: if atom1.name == 'C': for atom2 in residue2.atoms: if atom2.name == 'N': - if propka.calculations.squared_distance(atom1, atom2) \ - < self.default_dist_squared: + if (propka.calculations.squared_distance(atom1, atom2) + < self.default_dist_squared): self.make_bond(atom1, atom2) def find_bonds_for_residue_backbone(self, residue): @@ -161,17 +170,18 @@ class BondMaker: """ for atom1 in residue.atoms: if atom1.name in list(self.num_pi_elec_bonds_backbone.keys()): - atom1.num_pi_elec_2_3_bonds \ - = self.num_pi_elec_bonds_backbone[atom1.name] - if atom1.name in \ - list(self.num_pi_elec_conj_bonds_backbone.keys()) \ - and len(atom1.bonded_atoms) > 1: # last part to avoid including N-term - atom1.num_pi_elec_conj_2_3_bonds \ - = self.num_pi_elec_conj_bonds_backbone[atom1.name] + atom1.num_pi_elec_2_3_bonds = ( + self.num_pi_elec_bonds_backbone[atom1.name]) + if atom1.name in ( + list(self.num_pi_elec_conj_bonds_backbone.keys()) + and len(atom1.bonded_atoms) > 1): # avoid N-term + atom1.num_pi_elec_conj_2_3_bonds = ( + self.num_pi_elec_conj_bonds_backbone[atom1.name]) if atom1.name in self.backbone_atoms: for atom2 in residue.atoms: - if atom2.name in self.intra_residue_backbone_bonds[atom1.name]: + if atom2.name in ( + self.intra_residue_backbone_bonds[atom1.name]): self.make_bond(atom1, atom2) def find_bonds_for_side_chain(self, atoms): @@ -183,16 +193,17 @@ class BondMaker: for atom1 in atoms: key = '%s-%s' % (atom1.res_name, atom1.name) if key in list(self.num_pi_elec_bonds_sidechains.keys()): - atom1.num_pi_elec_2_3_bonds \ - = self.num_pi_elec_bonds_sidechains[key] + atom1.num_pi_elec_2_3_bonds = ( + self.num_pi_elec_bonds_sidechains[key]) if key in list(self.num_pi_elec_conj_bonds_sidechains.keys()): - atom1.num_pi_elec_conj_2_3_bonds \ - = self.num_pi_elec_conj_bonds_sidechains[key] - + atom1.num_pi_elec_conj_2_3_bonds = ( + self.num_pi_elec_conj_bonds_sidechains[key]) if not atom1.name in self.backbone_atoms: if not atom1.name in self.terminal_oxygen_names: for atom2 in atoms: - if atom2.name in self.protein_bonds[atom1.res_name][atom1.name]: + if atom2.name in ( + self + .protein_bonds[atom1.res_name][atom1.name]): self.make_bond(atom1, atom2) def find_bonds_for_ligand(self, ligand): @@ -216,25 +227,30 @@ class BondMaker: # for ligands if atom.type == 'hetatm': if atom.sybyl_type in self.num_pi_elec_bonds_ligands.keys(): - atom.num_pi_elec_2_3_bonds = self.num_pi_elec_bonds_ligands[atom.sybyl_type] - if atom.sybyl_type in self.num_pi_elec_conj_bonds_ligands.keys(): - atom.num_pi_elec_conj_2_3_bonds \ - = self.num_pi_elec_conj_bonds_ligands[atom.sybyl_type] + atom.num_pi_elec_2_3_bonds = ( + self.num_pi_elec_bonds_ligands[atom.sybyl_type]) + if atom.sybyl_type in ( + self.num_pi_elec_conj_bonds_ligands.keys()): + atom.num_pi_elec_conj_2_3_bonds = ( + self.num_pi_elec_conj_bonds_ligands[atom.sybyl_type]) # for protein if atom.type == 'atom': key = '%s-%s' % (atom.res_name, atom.name) if key in list(self.num_pi_elec_bonds_sidechains.keys()): - atom.num_pi_elec_2_3_bonds = self.num_pi_elec_bonds_sidechains[key] + atom.num_pi_elec_2_3_bonds = ( + self.num_pi_elec_bonds_sidechains[key]) if key in list(self.num_pi_elec_conj_bonds_sidechains.keys()): - atom.num_pi_elec_conj_2_3_bonds = self.num_pi_elec_conj_bonds_sidechains[key] - + atom.num_pi_elec_conj_2_3_bonds = ( + self.num_pi_elec_conj_bonds_sidechains[key]) if atom.name in list(self.num_pi_elec_bonds_backbone.keys()): - atom.num_pi_elec_2_3_bonds = self.num_pi_elec_bonds_backbone[atom.name] - if atom.name in list(self.num_pi_elec_conj_bonds_backbone.keys()) \ - and len(atom.bonded_atoms) > 1: + atom.num_pi_elec_2_3_bonds = ( + self.num_pi_elec_bonds_backbone[atom.name]) + if atom.name in list( + self.num_pi_elec_conj_bonds_backbone.keys()) and ( + len(atom.bonded_atoms) > 1): # last part to avoid including N-term - atom.num_pi_elec_conj_2_3_bonds \ - = self.num_pi_elec_conj_bonds_backbone[atom.name] + atom.num_pi_elec_conj_2_3_bonds = ( + self.num_pi_elec_conj_bonds_backbone[atom.name]) def find_bonds_for_protein_by_distance(self, molecule): """Finds bonds for all atoms in the molecule. @@ -302,7 +318,8 @@ class BondMaker: molecules: list of molecules for finding bonds. """ for name in molecules.conformation_names: - self.find_bonds_for_atoms_using_boxes(molecules.conformations[name].atoms) + self.find_bonds_for_atoms_using_boxes( + molecules.conformations[name].atoms) def add_pi_electron_information(self, molecules): """Add pi electron information to a molecule. @@ -311,7 +328,8 @@ class BondMaker: molecules: list of molecules for adding pi electron information. """ for name in molecules.conformation_names: - self.add_pi_electron_table_info(molecules.conformations[name].atoms) + self.add_pi_electron_table_info( + molecules.conformations[name].atoms) def find_bonds_for_atoms_using_boxes(self, atoms): """Finds all bonds for a list of atoms. @@ -417,10 +435,12 @@ class BondMaker: name_i = atom.name resi_j = bonded_atom.res_name name_j = bonded_atom.name - if not name_i in self.backbone_atoms or\ - not name_j in self.backbone_atoms: - if not name_i in self.terminal_oxygen_names and\ - not name_j in self.terminal_oxygen_names: + if not name_i in ( + self.backbone_atoms + or not name_j in self.backbone_atoms): + if not name_i in ( + self.terminal_oxygen_names + and not name_j in self.terminal_oxygen_names): if not resi_i in list(self.protein_bonds.keys()): self.protein_bonds[resi_i] = {} if not name_i in self.protein_bonds[resi_i]: From 1ce44b77bf0536a21da2a7e7937f8fc5d0e8acbd Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 17:33:16 -0700 Subject: [PATCH 37/65] Clean up line breaks in calculations.py See https://github.com/jensengroup/propka-3.1/issues/43. --- propka/calculations.py | 64 ++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/propka/calculations.py b/propka/calculations.py index 711cd60..b507764 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -259,14 +259,14 @@ def add_amd_hydrogen(residue): o_atom = None n_atom = None for atom in residue: - if (atom.res_name == "GLN" and atom.name == "CD") \ - or (atom.res_name == "ASN" and atom.name == "CG"): + if ((atom.res_name == "GLN" and atom.name == "CD") + or (atom.res_name == "ASN" and atom.name == "CG")): c_atom = atom - elif (atom.res_name == "GLN" and atom.name == "OE1") \ - or (atom.res_name == "ASN" and atom.name == "OD1"): + elif ((atom.res_name == "GLN" and atom.name == "OE1") + or (atom.res_name == "ASN" and atom.name == "OD1")): o_atom = atom - elif (atom.res_name == "GLN" and atom.name == "NE2") \ - or (atom.res_name == "ASN" and atom.name == "ND2"): + elif ((atom.res_name == "GLN" and atom.name == "NE2") + or (atom.res_name == "ASN" and atom.name == "ND2")): n_atom = atom if (c_atom is None) or (o_atom is None) or (n_atom is None): errstr = "Unable to find all atoms for %s %s" % (residue[0].res_name, @@ -445,8 +445,8 @@ def radial_volume_desolvation(parameters, group): min_dist_4th = MIN_DISTANCE_4TH for atom in all_atoms: # ignore atoms in the same residue - if atom.res_num == group.atom.res_num \ - and atom.chain_id == group.atom.chain_id: + if (atom.res_num == group.atom.res_num + and atom.chain_id == group.atom.chain_id): continue sq_dist = squared_distance(group, atom) # desolvation @@ -466,8 +466,9 @@ def radial_volume_desolvation(parameters, group): group.buried = calculate_weight(parameters, group.num_volume) scale_factor = calculate_scale_factor(parameters, group.buried) volume_after_allowance = max(0.00, volume-parameters.desolvationAllowance) - group.energy_volume = group.charge * parameters.desolvationPrefactor \ - * volume_after_allowance * scale_factor + group.energy_volume = ( + group.charge * parameters.desolvationPrefactor + * volume_after_allowance * scale_factor) def calculate_scale_factor(parameters, weight): @@ -494,8 +495,9 @@ def calculate_weight(parameters, num_volume): Returns: desolvation weight """ - weight = float(num_volume - parameters.Nmin) \ - / float(parameters.Nmax - parameters.Nmin) + weight = ( + float(num_volume - parameters.Nmin) + / float(parameters.Nmax - parameters.Nmin)) weight = min(1.0, weight) weight = max(0.0, weight) return weight @@ -647,8 +649,8 @@ def hydrogen_bond_interaction(group1, group2, version): # Do nothing, value should have been assigned. pass else: - value = version.calculate_side_chain_energy(dist, dpka_max, cutoff, weight, - f_angle) + value = version.calculate_side_chain_energy( + dist, dpka_max, cutoff, weight, f_angle) return value @@ -712,15 +714,16 @@ def coulomb_energy(dist, weight, parameters): """ diel = UNK_DIELECTRIC1 - (UNK_DIELECTRIC1 - UNK_DIELECTRIC2)*weight dist = max(dist, parameters.coulomb_cutoff1) - scale = (dist - parameters.coulomb_cutoff2)/(parameters.coulomb_cutoff1 \ - - parameters.coulomb_cutoff2) + scale = ( + (dist - parameters.coulomb_cutoff2) + / (parameters.coulomb_cutoff1 - parameters.coulomb_cutoff2)) scale = max(0.0, scale) scale = min(1.0, scale) dpka = UNK_PKA_SCALING1/(diel*dist)*scale return abs(dpka) -def backbone_reorganization(parameters, conformation): +def backbone_reorganization(_, conformation): """Perform calculations related to backbone reorganizations. NOTE - this was described in the code as "adding test stuff" @@ -728,7 +731,7 @@ def backbone_reorganization(parameters, conformation): TODO - figure out why a similar function exists in version.py Args: - parameters: not used + _: not used conformation: specific molecule conformation """ titratable_groups = conformation.get_backbone_reorganisation_groups() @@ -744,9 +747,10 @@ def backbone_reorganization(parameters, conformation): atom3=bbc_group.atom, center=center) if dist < UNK_BACKBONE_DISTANCE1 and f_angle > UNK_FANGLE_MIN: - value = 1.0 - (dist-UNK_BACKBONE_DISTANCE2) \ - / (UNK_BACKBONE_DISTANCE1-UNK_BACKBONE_DISTANCE2) - dpka += UNK_PKA_SCALING2*min(1.0, value) + value = ( + 1.0 - (dist-UNK_BACKBONE_DISTANCE2) + / (UNK_BACKBONE_DISTANCE1-UNK_BACKBONE_DISTANCE2)) + dpka += UNK_PKA_SCALING2 * min(1.0, value) titratable_group.energy_local = dpka*weight @@ -774,14 +778,14 @@ def check_exceptions(version, group1, group2): exception, value = check_coo_coo_exception(group1, group2, version) elif (res_type1 == "CYS") and (res_type2 == "CYS"): exception, value = check_cys_cys_exception(group1, group2, version) - elif (res_type1 == "COO") and (res_type2 == "HIS") or \ - (res_type1 == "HIS") and (res_type2 == "COO"): + elif ((res_type1 == "COO") and (res_type2 == "HIS") + or (res_type1 == "HIS") and (res_type2 == "COO")): exception, value = check_coo_his_exception(group1, group2, version) - elif (res_type1 == "OCO") and (res_type2 == "HIS") or \ - (res_type1 == "HIS") and (res_type2 == "OCO"): + elif ((res_type1 == "OCO") and (res_type2 == "HIS") + or (res_type1 == "HIS") and (res_type2 == "OCO")): exception, value = check_oco_his_exception(group1, group2, version) - elif (res_type1 == "CYS") and (res_type2 == "HIS") or \ - (res_type1 == "HIS") and (res_type2 == "CYS"): + elif ((res_type1 == "CYS") and (res_type2 == "HIS") + or (res_type1 == "HIS") and (res_type2 == "CYS")): exception, value = check_cys_his_exception(group1, group2, version) else: # do nothing, no exception for this pair @@ -933,8 +937,8 @@ def check_buried(num_volume1, num_volume2): Returns: True if interaction is buried, False otherwise """ - if (num_volume1 + num_volume2 <= COMBINED_NUM_BURIED_MAX) \ - and (num_volume1 <= SEPARATE_NUM_BURIED_MAX \ - or num_volume2 <= SEPARATE_NUM_BURIED_MAX): + if ((num_volume1 + num_volume2 <= COMBINED_NUM_BURIED_MAX) + and (num_volume1 <= SEPARATE_NUM_BURIED_MAX + or num_volume2 <= SEPARATE_NUM_BURIED_MAX)): return False return True From 284a33dfe8100dcdf32eab13e4aacb3109c6e1d6 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 17:49:52 -0700 Subject: [PATCH 38/65] Clean up line breaks. See https://github.com/jensengroup/propka-3.1/issues/43. --- propka/conformation_container.py | 105 ++++++++++++++++++------------- 1 file changed, 62 insertions(+), 43 deletions(-) diff --git a/propka/conformation_container.py b/propka/conformation_container.py index bc0698c..e81b8ac 100644 --- a/propka/conformation_container.py +++ b/propka/conformation_container.py @@ -57,9 +57,10 @@ class ConformationContainer: """Generate interaction map and charge centers.""" # if a group is coupled and we are reading a .propka_input file, then # some more configuration might be needed - map_ = make_interaction_map('Covalent coupling map for %s' % self, - self.get_covalently_coupled_groups(), - lambda g1, g2: g1 in g2.covalently_coupled_groups) + map_ = make_interaction_map( + 'Covalent coupling map for %s' % self, + self.get_covalently_coupled_groups(), + lambda g1, g2: g1 in g2.covalently_coupled_groups) info(map_) # check if we should set a common charge centre as well if self.parameters.common_charge_centre: @@ -67,14 +68,15 @@ class ConformationContainer: def set_common_charge_centres(self): """Assign charge centers to groups.""" - for system in self.get_coupled_systems(self.get_covalently_coupled_groups(), - Group.get_covalently_coupled_groups): + for system in self.get_coupled_systems( + self.get_covalently_coupled_groups(), + Group.get_covalently_coupled_groups): # make a list of the charge centre coordinates all_coordinates = list(map(lambda g: [g.x, g.y, g.z], system)) # find the common charge center - ccc = functools.reduce(lambda g1, g2: [g1[0]+g2[0], g1[1]+g2[1], - g1[2]+g2[2]], - all_coordinates) + ccc = functools.reduce( + lambda g1, g2: [g1[0]+g2[0], g1[1]+g2[1], g1[2]+g2[2]], + all_coordinates) ccc = list(map(lambda c: c/len(system), ccc)) # set the ccc for all coupled groups in this system for group in system: @@ -85,8 +87,8 @@ class ConformationContainer: """Find covalently coupled groups and set common charge centres.""" for group in self.get_titratable_groups(): # Find covalently bonded groups - bonded_groups = self.find_bonded_titratable_groups(group.atom, 1, - group.atom) + bonded_groups = self.find_bonded_titratable_groups( + group.atom, 1, group.atom) # coupled groups for bond_group in bonded_groups: if bond_group in group.covalently_coupled_groups: @@ -97,9 +99,10 @@ class ConformationContainer: if self.parameters.common_charge_centre: self.set_common_charge_centres() # print coupling map - map_ = make_interaction_map('Covalent coupling map for %s' % self, - self.get_covalently_coupled_groups(), - lambda g1, g2: g1 in g2.covalently_coupled_groups) + map_ = make_interaction_map( + 'Covalent coupling map for %s' % self, + self.get_covalently_coupled_groups(), + lambda g1, g2: g1 in g2.covalently_coupled_groups) info(map_) def find_non_covalently_coupled_groups(self, verbose=False): @@ -134,14 +137,14 @@ class ConformationContainer: if bond_atom == original_atom: continue # check if this atom has a titratable group - if bond_atom.group and bond_atom.group.titratable \ - and num_bonds <= self.parameters.coupling_max_number_of_bonds: + if (bond_atom.group and bond_atom.group.titratable + and num_bonds + <= self.parameters.coupling_max_number_of_bonds): res.add(bond_atom.group) # check for titratable groups bonded to this atom if num_bonds < self.parameters.coupling_max_number_of_bonds: - res |= self.find_bonded_titratable_groups(bond_atom, - num_bonds+1, - original_atom) + res |= self.find_bonded_titratable_groups( + bond_atom, num_bonds+1, original_atom) return res def setup_and_add_group(self, group): @@ -190,22 +193,23 @@ class ConformationContainer: for group in self.get_titratable_groups() + self.get_ions(): version.calculate_desolvation(group) # calculate backbone interactions - set_backbone_determinants(self.get_titratable_groups(), - self.get_backbone_groups(), version) + set_backbone_determinants( + self.get_titratable_groups(), self.get_backbone_groups(), version) # setting ion determinants set_ion_determinants(self, version) # calculating the back-bone reorganization/desolvation term version.calculate_backbone_reorganization(self) # setting remaining non-iterative and iterative side-chain & Coulomb # interaction determinants - set_determinants(self.get_sidechain_groups(), version=version, - options=options) + set_determinants( + self.get_sidechain_groups(), version=version, options=options) # calculating the total pKa values for group in self.groups: group.calculate_total_pka() # take coupling effects into account penalised_labels = self.coupling_effects() - if self.parameters.remove_penalised_group and len(penalised_labels) > 0: + if (self.parameters.remove_penalised_group + and len(penalised_labels) > 0): info('Removing penalised groups!!!') for group in self.get_titratable_groups(): group.remove_determinants(penalised_labels) @@ -227,8 +231,9 @@ class ConformationContainer: titrate. """ penalised_labels = [] - for all_groups in self.get_coupled_systems(self.get_covalently_coupled_groups(), - Group.get_covalently_coupled_groups): + for all_groups in self.get_coupled_systems( + self.get_covalently_coupled_groups(), + Group.get_covalently_coupled_groups): # check if we should share determinants if self.parameters.shared_determinants: self.share_determinants(all_groups) @@ -236,15 +241,19 @@ class ConformationContainer: first_group = max(all_groups, key=lambda g: g.pka_value) # In case of acids if first_group.charge < 0: - first_group.coupled_titrating_group = min(all_groups, key=lambda g: g.pka_value) - penalised_labels.append(first_group.label) # group with the highest pKa is penalised + first_group.coupled_titrating_group = min( + all_groups, key=lambda g: g.pka_value) + # group with the highest pKa is penalised + penalised_labels.append(first_group.label) # In case of bases else: for group in all_groups: if group == first_group: - continue # group with the highest pKa is allowed to titrate... + # group with the highest pKa is allowed to titrate... + continue group.coupled_titrating_group = first_group - penalised_labels.append(group.label) #... and the rest is penalised + #... and the rest are penalised + penalised_labels.append(group.label) return penalised_labels @staticmethod @@ -287,7 +296,8 @@ class ConformationContainer: while len(groups) > 0: # extract a system of coupled groups ... system = set() - self.get_a_coupled_system_of_groups(groups.pop(), system, get_coupled_groups) + self.get_a_coupled_system_of_groups( + groups.pop(), system, get_coupled_groups) # ... and remove them from the list groups -= system yield system @@ -356,7 +366,8 @@ class ConformationContainer: Returns: list of groups """ - return [group for group in self.groups \ + return [ + group for group in self.groups if ('BB' not in group.type and not group.atom.cysteine_bridge)] def get_covalently_coupled_groups(self): @@ -365,7 +376,8 @@ class ConformationContainer: Returns: list of groups """ - return [g for g in self.groups \ + return [ + g for g in self.groups if len(g.covalently_coupled_groups) > 0] def get_non_covalently_coupled_groups(self): @@ -374,7 +386,8 @@ class ConformationContainer: Returns: list of groups """ - return [g for g in self.groups \ + return [ + g for g in self.groups if len(g.non_covalently_coupled_groups) > 0] def get_backbone_nh_groups(self): @@ -401,7 +414,8 @@ class ConformationContainer: Returns: list of groups """ - return [group for group in self.groups if group.residue_type == residue] + return [ + group for group in self.groups if group.residue_type == residue] def get_titratable_groups(self): """Get all titratable groups needed for pKa calculations. @@ -429,8 +443,9 @@ class ConformationContainer: Returns: list of groups """ - return [group for group in self.groups \ - if (group.residue_type in self.parameters.acid_list \ + return [ + group for group in self.groups + if (group.residue_type in self.parameters.acid_list and not group.atom.cysteine_bridge)] def get_backbone_reorganisation_groups(self): @@ -439,8 +454,10 @@ class ConformationContainer: Returns: list of groups """ - return [group for group in self.groups \ - if (group.residue_type in self.parameters.backbone_reorganisation_list \ + return [ + group for group in self.groups + if (group.residue_type + in self.parameters.backbone_reorganisation_list and not group.atom.cysteine_bridge)] def get_ions(self): @@ -449,7 +466,8 @@ class ConformationContainer: Returns: list of groups """ - return [group for group in self.groups \ + return [ + group for group in self.groups if group.residue_type in self.parameters.ions.keys()] def get_group_names(self, group_list): @@ -476,7 +494,8 @@ class ConformationContainer: Returns: list of atoms """ - return [atom for atom in self.atoms \ + return [ + atom for atom in self.atoms if atom.type == 'hetatm' and atom.element != 'H'] def get_chain(self, chain): @@ -556,9 +575,9 @@ class ConformationContainer: def __str__(self): """String that lists statistics of atoms and groups.""" - str_ = 'Conformation container %s with %d atoms and %d groups' % (self.name, - len(self), - len(self.groups)) + str_ = ( + 'Conformation container %s with %d atoms and %d groups' + % (self.name, len(self), len(self.groups))) return str_ def __len__(self): From ba8362f5146f83219fcec09843c397f4410e22df Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 18:09:59 -0700 Subject: [PATCH 39/65] Clean up line breaks in coupled_groups.py See https://github.com/jensengroup/propka-3.1/issues/43. --- propka/coupled_groups.py | 137 ++++++++++++++++++++++----------------- 1 file changed, 78 insertions(+), 59 deletions(-) diff --git a/propka/coupled_groups.py b/propka/coupled_groups.py index 54197ab..7220304 100644 --- a/propka/coupled_groups.py +++ b/propka/coupled_groups.py @@ -28,8 +28,8 @@ class NonCovalentlyCoupledGroups: # check if the interaction energy is high enough interaction_energy = max(self.get_interaction(group1, group2), self.get_interaction(group2, group1)) - if interaction_energy <= self.parameters.min_interaction_energy \ - and return_on_fail: + if (interaction_energy <= self.parameters.min_interaction_energy + and return_on_fail): return {'coupling_factor': -1.0} # calculate intrinsic pKa's, if not already done for group in [group1, group2]: @@ -43,8 +43,8 @@ class NonCovalentlyCoupledGroups: default_pka1 = group1.pka_value default_pka2 = group2.pka_value # check that pka values are within relevant limits - if max(default_pka1, default_pka2) < self.parameters.min_pka or \ - min(default_pka1, default_pka2) > self.parameters.max_pka: + if (max(default_pka1, default_pka2) < self.parameters.min_pka + or min(default_pka1, default_pka2) > self.parameters.max_pka): if return_on_fail: return {'coupling_factor': -1.0} # Swap interactions and re-calculate pKa values @@ -52,7 +52,8 @@ class NonCovalentlyCoupledGroups: group1.calculate_total_pka() group2.calculate_total_pka() # store swapped energy and pka's - swapped_energy = energy_method(ph=use_ph, reference=self.parameters.reference) + swapped_energy = energy_method( + ph=use_ph, reference=self.parameters.reference) swapped_pka1 = group1.pka_value swapped_pka2 = group2.pka_value pka_shift1 = swapped_pka1 - default_pka1 @@ -62,21 +63,23 @@ class NonCovalentlyCoupledGroups: group1.calculate_total_pka() group2.calculate_total_pka() # check difference in free energy - if abs(default_energy - swapped_energy) > self.parameters.max_free_energy_diff \ - and return_on_fail: + if (abs(default_energy - swapped_energy) + > self.parameters.max_free_energy_diff and return_on_fail): return {'coupling_factor': -1.0} # check pka shift - if max(abs(pka_shift1), abs(pka_shift2)) < self.parameters.min_swap_pka_shift \ - and return_on_fail: + if (max(abs(pka_shift1), abs(pka_shift2)) + < self.parameters.min_swap_pka_shift and return_on_fail): return {'coupling_factor': -1.0} # check intrinsic pka diff - if abs(group1.intrinsic_pka - group2.intrinsic_pka) \ - > self.parameters.max_intrinsic_pka_diff and return_on_fail: + if (abs(group1.intrinsic_pka - group2.intrinsic_pka) + > self.parameters.max_intrinsic_pka_diff and return_on_fail): return {'coupling_factor': -1.0} # if everything is OK, calculate the coupling factor and return all info - factor = self.get_free_energy_diff_factor(default_energy, swapped_energy) \ - * self.get_pka_diff_factor(group1.intrinsic_pka, group2.intrinsic_pka) \ - * self.get_interaction_factor(interaction_energy) + factor = ( + self.get_free_energy_diff_factor(default_energy, swapped_energy) + * self.get_pka_diff_factor(group1.intrinsic_pka, + group2.intrinsic_pka) + * self.get_interaction_factor(interaction_energy)) return {'coupling_factor': factor, 'default_energy': default_energy, 'swapped_energy': swapped_energy, 'interaction_energy': interaction_energy, @@ -96,7 +99,9 @@ class NonCovalentlyCoupledGroups: intrinsic_pka_diff = abs(pka1-pka2) res = 0.0 if intrinsic_pka_diff <= self.parameters.max_intrinsic_pka_diff: - res = 1-(intrinsic_pka_diff/self.parameters.max_intrinsic_pka_diff)**2 + res = ( + 1-(intrinsic_pka_diff + /self.parameters.max_intrinsic_pka_diff)**2) return res def get_free_energy_diff_factor(self, energy1, energy2): @@ -125,8 +130,10 @@ class NonCovalentlyCoupledGroups: res = 0.0 interaction_energy = abs(interaction_energy) if interaction_energy >= self.parameters.min_interaction_energy: - res = (interaction_energy-self.parameters.min_interaction_energy) \ - / (1.0+interaction_energy-self.parameters.min_interaction_energy) + res = ( + (interaction_energy-self.parameters.min_interaction_energy) + / (1.0+interaction_energy + -self.parameters.min_interaction_energy)) return res def identify_non_covalently_coupled_groups(self, conformation, @@ -140,24 +147,29 @@ class NonCovalentlyCoupledGroups: self.parameters = conformation.parameters if verbose: info('') - info(' Warning: When using the -d option, pKa values based on \'swapped\' interactions') + info(' Warning: When using the -d option, pKa values based on ' + '\'swapped\' interactions') info(' will be writting to the output .pka file') info('') info('-' * 103) info(' Detecting non-covalently coupled residues') info('-' * 103) - info(' Maximum pKa difference: %4.2f pKa units' \ - % self.parameters.max_intrinsic_pka_diff) - info(' Minimum interaction energy: %4.2f pKa units' \ - % self.parameters.min_interaction_energy) - info(' Maximum free energy diff.: %4.2f pKa units' \ - % self.parameters.max_free_energy_diff) - info(' Minimum swap pKa shift: %4.2f pKa units' \ - % self.parameters.min_swap_pka_shift) - info(' pH: %6s ' % str(self.parameters.pH)) - info(' Reference: %s' % self.parameters.reference) - info(' Min pKa: %4.2f' % self.parameters.min_pka) - info(' Max pKa: %4.2f' % self.parameters.max_pka) + info(' Maximum pKa difference: %4.2f pKa units' + % self.parameters.max_intrinsic_pka_diff) + info(' Minimum interaction energy: %4.2f pKa units' + % self.parameters.min_interaction_energy) + info(' Maximum free energy diff.: %4.2f pKa units' + % self.parameters.max_free_energy_diff) + info(' Minimum swap pKa shift: %4.2f pKa units' + % self.parameters.min_swap_pka_shift) + info(' pH: %6s ' + % str(self.parameters.pH)) + info(' Reference: %s' + % self.parameters.reference) + info(' Min pKa: %4.2f' + % self.parameters.min_pka) + info(' Max pKa: %4.2f' + % self.parameters.max_pka) info('') # find coupled residues titratable_groups = conformation.get_titratable_groups() @@ -166,13 +178,13 @@ class NonCovalentlyCoupledGroups: for group2 in titratable_groups: if group1 == group2: break - if not group1 in group2.non_covalently_coupled_groups \ - and self.do_prot_stat: - data = self.\ - is_coupled_protonation_state_probability(group1, - group2, - conformation.\ - calculate_folding_energy) + if (group1 not in group2.non_covalently_coupled_groups + and self.do_prot_stat): + data = ( + self + .is_coupled_protonation_state_probability( + group1, group2, + conformation.calculate_folding_energy)) if data['coupling_factor'] > 0.0: group1.couple_non_covalently(group2) if verbose: @@ -184,12 +196,13 @@ class NonCovalentlyCoupledGroups: Args: conformation: conformation to print """ - map_ = make_interaction_map('Non-covalent coupling map for %s' % conformation, - conformation.get_non_covalently_coupled_groups(), - lambda g1, g2: g1 in g2.non_covalently_coupled_groups) + map_ = make_interaction_map( + 'Non-covalent coupling map for %s' % conformation, + conformation.get_non_covalently_coupled_groups(), + lambda g1, g2: g1 in g2.non_covalently_coupled_groups) info(map_) - for system in conformation.get_coupled_systems(conformation.\ - get_non_covalently_coupled_groups(), \ + for system in conformation.get_coupled_systems( + conformation.get_non_covalently_coupled_groups(), Group.get_non_covalently_coupled_groups): self.print_system(conformation, list(system)) @@ -206,11 +219,14 @@ class NonCovalentlyCoupledGroups: # print out coupling info for each interaction coup_info = '' for interaction in interactions: - data = self.is_coupled_protonation_state_probability(interaction[0], \ - interaction[1], conformation.calculate_folding_energy, \ - return_on_fail=False) - coup_info += self.make_data_to_string(data, interaction[0], \ - interaction[1]) + '\n\n' + data = ( + self.is_coupled_protonation_state_probability( + interaction[0], interaction[1], + conformation.calculate_folding_energy, + return_on_fail=False)) + coup_info += ( + self.make_data_to_string(data, interaction[0], interaction[1]) + + '\n\n') info(coup_info) # make list of possible combinations of swap to try out combinations = propka.lib.generate_combinations(interactions) @@ -236,14 +252,15 @@ class NonCovalentlyCoupledGroups: Args: group1: first group for interaction group2: second group for interaction - include_side_chain_hbs: include side-chain hydrogen bonds in energy + include_side_chain_hbs: include sidechain hydrogen bonds in energy Returns: interaction energy (float) """ determinants = group1.determinants['coulomb'] if include_side_chain_hbs: - determinants = group1.determinants['sidechain'] \ - + group1.determinants['coulomb'] + determinants = ( + group1.determinants['sidechain'] + + group1.determinants['coulomb']) interaction_energy = 0.0 for det in determinants: if group2 == det.group: @@ -346,19 +363,21 @@ class NonCovalentlyCoupledGroups: Returns: formatted string with information. """ - str_ = \ + str_ = ( """ %s and %s coupled (prot.state): %5.2f Energy levels: %6.2f, %6.2f (difference: %6.2f) at pH %6.2f Interaction energy: %6.2f Intrinsic pka's: %6.2f, %6.2f (difference: %6.2f) - Swapped pKa's: %6.2f, %6.2f (difference: %6.2f, %6.2f)""" % \ - (group1.label, group2.label, data['coupling_factor'], - data['default_energy'], data['swapped_energy'], - data['default_energy'] - data['swapped_energy'], data['pH'], - data['interaction_energy'], group1.intrinsic_pka, group2.intrinsic_pka, - group1.intrinsic_pka-group2.intrinsic_pka, data['swapped_pka1'], - data['swapped_pka2'], data['pka_shift1'], data['pka_shift2']) - + Swapped pKa's: %6.2f, %6.2f (difference: %6.2f, %6.2f)""" + % ( + group1.label, group2.label, data['coupling_factor'], + data['default_energy'], data['swapped_energy'], + data['default_energy'] - data['swapped_energy'], + data['pH'], data['interaction_energy'], + group1.intrinsic_pka, group2.intrinsic_pka, + group1.intrinsic_pka-group2.intrinsic_pka, + data['swapped_pka1'], data['swapped_pka2'], + data['pka_shift1'], data['pka_shift2'])) return str_ From 891fecaab3a2d435212316cee7f3990f1e678c93 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 18:15:46 -0700 Subject: [PATCH 40/65] Clean up line breaks in determinants.py See https://github.com/jensengroup/propka-3.1/issues/43 --- propka/determinants.py | 84 ++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 39 deletions(-) diff --git a/propka/determinants.py b/propka/determinants.py index fa80763..9a82e33 100644 --- a/propka/determinants.py +++ b/propka/determinants.py @@ -39,13 +39,13 @@ def set_determinants(propka_groups, version=None, options=None): break distance = propka.calculations.distance(group1, group2) if distance < version.parameters.coulomb_cutoff2: - interaction_type = version.parameters.interaction_matrix.get_value(group1.type, - group2.type) + interaction_type = ( + version.parameters.interaction_matrix.get_value( + group1.type, group2.type)) if interaction_type == 'I': - propka.iterative.add_to_determinant_list(group1, group2, - distance, - iterative_interactions, - version=version) + propka.iterative.add_to_determinant_list( + group1, group2, distance, iterative_interactions, + version=version) elif interaction_type == 'N': add_determinants(group1, group2, distance, version) # --- Iterative section ---# @@ -53,7 +53,7 @@ def set_determinants(propka_groups, version=None, options=None): def add_determinants(group1, group2, distance, version): - """Add determinants and perturbations for distance(R1, R2) < coulomb_cutoff. + """Add determinants and perturbations for distance(R1,R2) < coulomb_cutoff. Args: group1: first group to add @@ -88,8 +88,10 @@ def add_sidechain_determinants(group1, group2, version=None): new_determinant1 = Determinant(group2, hbond_interaction) new_determinant2 = Determinant(group1, -hbond_interaction) else: - new_determinant1 = Determinant(group2, hbond_interaction*group1.charge) - new_determinant2 = Determinant(group1, hbond_interaction*group2.charge) + new_determinant1 = Determinant( + group2, hbond_interaction*group1.charge) + new_determinant2 = Determinant( + group1, hbond_interaction*group2.charge) group1.determinants['sidechain'].append(new_determinant1) group2.determinants['sidechain'].append(new_determinant2) @@ -103,8 +105,8 @@ def add_coulomb_determinants(group1, group2, distance, version): distance: distance between groups version: version object """ - coulomb_interaction = version.electrostatic_interaction(group1, group2, - distance) + coulomb_interaction = version.electrostatic_interaction( + group1, group2, distance) if coulomb_interaction: q1 = group1.charge q2 = group2.charge @@ -187,13 +189,14 @@ def set_ion_determinants(conformation_container, version): for ion_group in conformation_container.get_ions(): dist_sq = squared_distance(titratable_group, ion_group) if dist_sq < version.parameters.coulomb_cutoff2_squared: - weight = version.calculate_pair_weight(titratable_group.num_volume, - ion_group.num_volume) + weight = version.calculate_pair_weight( + titratable_group.num_volume, ion_group.num_volume) # the pKa of both acids and bases are shifted up by negative # ions (and vice versa) - value = (-ion_group.charge) \ - * version.calculate_coulomb_energy(math.sqrt(dist_sq), - weight) + value = ( + -ion_group.charge + * version.calculate_coulomb_energy( + math.sqrt(dist_sq), weight)) new_det = Determinant(ion_group, value) titratable_group.determinants['coulomb'].append(new_det) @@ -207,24 +210,26 @@ def set_backbone_determinants(titratable_groups, backbone_groups, version): version: version object """ for titratable_group in titratable_groups: - titratable_group_interaction_atoms \ - = titratable_group.interaction_atoms_for_acids + titratable_group_interaction_atoms = ( + titratable_group.interaction_atoms_for_acids) if not titratable_group_interaction_atoms: continue # find out which backbone groups this titratable is interacting with for backbone_group in backbone_groups: # find the interacting atoms - backbone_interaction_atoms \ - = backbone_group.get_interaction_atoms(titratable_group) + backbone_interaction_atoms = ( + backbone_group.get_interaction_atoms(titratable_group)) if not backbone_interaction_atoms: continue # find the smallest distance - [backbone_atom, distance, titratable_atom] \ - = get_smallest_distance(backbone_interaction_atoms, \ - titratable_group_interaction_atoms) + [backbone_atom, distance, titratable_atom] = ( + get_smallest_distance( + backbone_interaction_atoms, + titratable_group_interaction_atoms)) # get the parameters - parameters = version.get_backbone_hydrogen_bond_parameters(backbone_atom, - titratable_atom) + parameters = ( + version.get_backbone_hydrogen_bond_parameters( + backbone_atom, titratable_atom)) if not parameters: continue [dpka_max, [cutoff1, cutoff2]] = parameters @@ -241,14 +246,14 @@ def set_backbone_determinants(titratable_groups, backbone_groups, version): # || # C if backbone_group.type == 'BBC': - if titratable_group.type \ - in version.parameters.angular_dependent_sidechain_interactions: + if (titratable_group.type + in version.parameters.angular_dependent_sidechain_interactions): if titratable_atom.element == 'H': heavy_atom = titratable_atom.bonded_atoms[0] hydrogen_atom = titratable_atom - [_, f_angle, _] = angle_distance_factors(atom1=heavy_atom, - atom2=hydrogen_atom, - atom3=backbone_atom) + [_, f_angle, _] = angle_distance_factors( + atom1=heavy_atom, atom2=hydrogen_atom, + atom3=backbone_atom) else: # Either the structure is corrupt (no hydrogen), # or the heavy atom is closer to the titratable @@ -267,19 +272,20 @@ def set_backbone_determinants(titratable_groups, backbone_groups, version): if backbone_atom.element == 'H': backbone_n = backbone_atom.bonded_atoms[0] backbone_h = backbone_atom - [_, f_angle, _] = angle_distance_factors(atom1=titratable_atom, - atom2=backbone_h, - atom3=backbone_n) + [_, f_angle, _] = ( + angle_distance_factors( + atom1=titratable_atom, atom2=backbone_h, + atom3=backbone_n)) else: # Either the structure is corrupt (no hydrogen), or the # heavy atom is closer to the titratable atom than the # hydrogen. In either case we set the angle factor to 0 f_angle = 0.0 if f_angle > FANGLE_MIN: - value = titratable_group.charge * hydrogen_bond_energy(distance, - dpka_max, - [cutoff1, cutoff2], - f_angle) + value = ( + titratable_group.charge + * hydrogen_bond_energy( + distance, dpka_max, [cutoff1, cutoff2], f_angle)) new_determinant = Determinant(backbone_group, value) - titratable_group.determinants['backbone'].append(new_determinant) - \ No newline at end of file + titratable_group.determinants['backbone'].append( + new_determinant) From 919305d8a3027ea15e5b6d7424fa59b8fb4524e1 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 18:48:24 -0700 Subject: [PATCH 41/65] Clean up line breaks in group.py and hybrid36.py https://github.com/jensengroup/propka-3.1/issues/43 --- propka/group.py | 207 +++++++++++++++++++++++---------------------- propka/hybrid36.py | 2 +- 2 files changed, 108 insertions(+), 101 deletions(-) diff --git a/propka/group.py b/propka/group.py index fa746bf..24cb852 100644 --- a/propka/group.py +++ b/propka/group.py @@ -9,40 +9,26 @@ from propka.lib import info, warning # Constants that start with "UNK_" are a mystery to me UNK_PKA_SCALING = -1.36 PROTONATOR = propka.protonate.Protonate(verbose=False) -EXPECTED_ATOMS_ACID_INTERACTIONS = {'COO': {'O': 2}, 'HIS': {'H': 2, 'N': 2}, - 'CYS': {'S': 1}, 'TYR': {'O': 1}, - 'LYS': {'N': 1}, 'ARG': {'H': 5, 'N': 3}, - 'ROH': {'O': 1}, 'AMD': {'H': 2, 'N': 1}, - 'TRP': {'H': 1, 'N': 1}, 'N+': {'N': 1}, - 'C-': {'O': 2}, 'BBN': {'H': 1, 'N': 1,}, - 'BBC': {'O': 1}, 'NAR': {'H': 1, 'N': 1}, - 'NAM': {'H': 1, 'N': 1}, 'F': {'F': 1}, - 'Cl': {'Cl': 1}, 'OH': {'H': 1, 'O': 1}, - 'OP': {'O': 1}, 'O3': {'O': 1}, - 'O2': {'O': 1}, 'SH': {'S': 1}, - 'CG': {'H': 5, 'N': 3}, - 'C2N': {'H': 4, 'N': 2}, 'OCO': {'O': 2}, - 'N30': {'H': 4, 'N': 1}, - 'N31': {'H': 3, 'N': 1}, - 'N32': {'H': 2, 'N': 1}, - 'N33': {'H': 1, 'N': 1}, - 'NP1': {'H': 2, 'N': 1}, 'N1': {'N': 1}} -EXPECTED_ATOMS_BASE_INTERACTIONS = {'COO': {'O': 2}, 'HIS': {'N': 2}, - 'CYS': {'S': 1}, 'TYR': {'O': 1}, - 'LYS': {'N': 1}, 'ARG': {'N': 3}, - 'ROH': {'O': 1}, 'AMD': {'O': 1}, - 'TRP': {'N': 1}, 'N+': {'N': 1}, - 'C-': {'O': 2}, 'BBN': {'H': 1, 'N': 1}, - 'BBC': {'O': 1}, 'NAR': {'H': 1, 'N': 1}, - 'NAM': {'H': 1, 'N': 1}, 'F': {'F': 1}, - 'Cl': {'Cl': 1}, 'OH': {'H': 1, 'O': 1}, - 'OP': {'O': 1}, 'O3': {'O': 1}, - 'O2': {'O': 1}, 'SH': {'S': 1}, - 'CG': {'N': 3}, 'C2N': {'N': 2}, - 'OCO': {'O': 2}, 'N30': {'N': 1}, - 'N31': {'N': 1}, 'N32': {'N': 1}, - 'N33': {'N': 1}, 'NP1': {'N': 1}, - 'N1': {'N': 1}} +EXPECTED_ATOMS_ACID_INTERACTIONS = { + 'COO': {'O': 2}, 'HIS': {'H': 2, 'N': 2}, 'CYS': {'S': 1}, 'TYR': {'O': 1}, + 'LYS': {'N': 1}, 'ARG': {'H': 5, 'N': 3}, 'ROH': {'O': 1}, + 'AMD': {'H': 2, 'N': 1}, 'TRP': {'H': 1, 'N': 1}, 'N+': {'N': 1}, + 'C-': {'O': 2}, 'BBN': {'H': 1, 'N': 1,}, 'BBC': {'O': 1}, + 'NAR': {'H': 1, 'N': 1}, 'NAM': {'H': 1, 'N': 1}, 'F': {'F': 1}, + 'Cl': {'Cl': 1}, 'OH': {'H': 1, 'O': 1}, 'OP': {'O': 1}, 'O3': {'O': 1}, + 'O2': {'O': 1}, 'SH': {'S': 1}, 'CG': {'H': 5, 'N': 3}, + 'C2N': {'H': 4, 'N': 2}, 'OCO': {'O': 2}, 'N30': {'H': 4, 'N': 1}, + 'N31': {'H': 3, 'N': 1}, 'N32': {'H': 2, 'N': 1}, 'N33': {'H': 1, 'N': 1}, + 'NP1': {'H': 2, 'N': 1}, 'N1': {'N': 1}} +EXPECTED_ATOMS_BASE_INTERACTIONS = { + 'COO': {'O': 2}, 'HIS': {'N': 2}, 'CYS': {'S': 1}, 'TYR': {'O': 1}, + 'LYS': {'N': 1}, 'ARG': {'N': 3}, 'ROH': {'O': 1}, 'AMD': {'O': 1}, + 'TRP': {'N': 1}, 'N+': {'N': 1}, 'C-': {'O': 2}, 'BBN': {'H': 1, 'N': 1}, + 'BBC': {'O': 1}, 'NAR': {'H': 1, 'N': 1}, 'NAM': {'H': 1, 'N': 1}, + 'F': {'F': 1}, 'Cl': {'Cl': 1}, 'OH': {'H': 1, 'O': 1}, 'OP': {'O': 1}, + 'O3': {'O': 1}, 'O2': {'O': 1}, 'SH': {'S': 1}, 'CG': {'N': 3}, + 'C2N': {'N': 2}, 'OCO': {'O': 2}, 'N30': {'N': 1}, 'N31': {'N': 1}, + 'N32': {'N': 1}, 'N33': {'N': 1}, 'NP1': {'N': 1}, 'N1': {'N': 1}} class Group: @@ -93,14 +79,12 @@ class Group: self.label = '%-3s%4d%2s' % (self.residue_type, atom.res_num, atom.chain_id) elif self.atom.res_name in ['DA ', 'DC ', 'DG ', 'DT ']: - self.label = '%1s%1s%1s%4d%2s' % (self.residue_type[1], - atom.element, - atom.name.replace('\'', '')[-1], - atom.res_num, - atom.chain_id) + self.label = '%1s%1s%1s%4d%2s' % ( + self.residue_type[1], atom.element, + atom.name.replace('\'', '')[-1], atom.res_num, atom.chain_id) else: - self.label = '%-3s%4s%2s' % (self.residue_type, atom.name, - atom.chain_id) + self.label = '%-3s%4s%2s' % ( + self.residue_type, atom.name, atom.chain_id) # container for squared distances self.squared_distances = {} @@ -180,8 +164,8 @@ class Group: added = True # otherwise we just add the determinant to our list if not added: - self.determinants[type_].append(Determinant(new_determinant.group, - new_determinant.value)) + self.determinants[type_].append( + Determinant(new_determinant.group, new_determinant.value)) def make_covalently_coupled_line(self): """Create line for covalent coupling. @@ -232,8 +216,9 @@ class Group: return self.label == other.label else: # For heterogene atoms we also need to check the residue number - return (self.label == other.label) \ - and (self.atom.res_num == other.atom.res_num) + return ( + (self.label == other.label) + and (self.atom.res_num == other.atom.res_num)) def __hash__(self): """Needed for creating sets of groups.""" @@ -241,8 +226,8 @@ class Group: def __iadd__(self, other): if self.type != other.type: - errstr = 'Cannot add groups of different types (%s and %s)' \ - % (self.type, other.type) + errstr = ('Cannot add groups of different types (%s and %s)' + % (self.type, other.type)) raise Exception(errstr) # add all values self.pka_value += other.pka_value @@ -301,8 +286,9 @@ class Group: labels: list of labels to remove """ for type_ in ['sidechain', 'backbone', 'coulomb']: - matches = list(filter(lambda d: d.label in labels, \ - [d for d in self.determinants[type_]])) + matches = list( + filter(lambda d: d.label + in labels, [d for d in self.determinants[type_]])) for match in matches: self.determinants[type_].remove(match) @@ -354,7 +340,9 @@ class Group: if not self.model_pka_set: self.model_pka = self.parameters.model_pkas[self.residue_type] # check if we should apply a custom model pka - key = '%s-%s'%(self.atom.res_name.strip(), self.atom.name.strip()) + key = ( + '%s-%s' + % (self.atom.res_name.strip(), self.atom.name.strip())) if key in self.parameters.custom_model_pkas.keys(): self.model_pka = self.parameters.custom_model_pkas[key] self.model_pka_set = True @@ -392,25 +380,31 @@ class Group: self.interaction_atoms_for_bases, 'base']]: if self.type in expect.keys(): for elem in expect[self.type].keys(): - if len([a for a in found if a.element == elem]) \ - != expect[self.type][elem]: + if (len([a for a in found if a.element == elem]) + != expect[self.type][elem]): ok = False if not ok: str_ = 'Missing atoms or failed protonation for ' - str_ += '%s (%s) -- please check the structure' % (self.label, - self.type) + str_ += ('%s (%s) -- please check the structure' + % (self.label, self.type)) warning(str_) warning('%s' % self) - num_acid = sum([EXPECTED_ATOMS_ACID_INTERACTIONS[self.type][e] \ - for e in EXPECTED_ATOMS_ACID_INTERACTIONS[self.type].keys()]) - num_base = sum([EXPECTED_ATOMS_BASE_INTERACTIONS[self.type][e] \ - for e in EXPECTED_ATOMS_BASE_INTERACTIONS[self.type].keys()]) - warning('Expected %d interaction atoms for acids, found:' % num_acid) + num_acid = sum( + [EXPECTED_ATOMS_ACID_INTERACTIONS[self.type][e] + for e in EXPECTED_ATOMS_ACID_INTERACTIONS[self.type].keys()]) + num_base = sum( + [EXPECTED_ATOMS_BASE_INTERACTIONS[self.type][e] + for e in EXPECTED_ATOMS_BASE_INTERACTIONS[self.type].keys()]) + warning( + 'Expected %d interaction atoms for acids, found:' % num_acid) for i in range(len(self.interaction_atoms_for_acids)): - warning(' %s' % self.interaction_atoms_for_acids[i]) - warning('Expected %d interaction atoms for bases, found:' % num_base) + warning( + ' %s' % self.interaction_atoms_for_acids[i]) + warning( + 'Expected %d interaction atoms for bases, found:' % num_base) for i in range(len(self.interaction_atoms_for_bases)): - warning(' %s' % self.interaction_atoms_for_bases[i]) + warning( + ' %s' % self.interaction_atoms_for_bases[i]) def get_interaction_atoms(self, interacting_group): """Get atoms involved in interaction with other group. @@ -507,7 +501,8 @@ class Group: if self.atom.cysteine_bridge: self.pka_value = 99.99 return - self.pka_value = self.model_pka + self.energy_volume + self.energy_local + self.pka_value = ( + self.model_pka + self.energy_volume + self.energy_local) for determinant_type in ['sidechain', 'backbone', 'coulomb']: for determinant in self.determinants[determinant_type]: self.pka_value += determinant.value @@ -523,13 +518,14 @@ class Group: back_bone += value side_chain = 0.0 for determinant in self.determinants['sidechain']: - if determinant.label[0:3] not in ['ASP', 'GLU', 'LYS', 'ARG', - 'HIS', 'CYS', 'TYR', 'C- ', - 'N+ ']: + if determinant.label[0:3] not in [ + 'ASP', 'GLU', 'LYS', 'ARG', 'HIS', 'CYS', 'TYR', 'C- ', + 'N+ ']: value = determinant.value side_chain += value - self.intrinsic_pka = self.model_pka + self.energy_volume \ - + self.energy_local + back_bone + side_chain + self.intrinsic_pka = ( + self.model_pka + self.energy_volume + self.energy_local + + back_bone + side_chain) def get_summary_string(self, remove_penalised_group=False): """Create summary string for this group. @@ -546,13 +542,13 @@ class Group: ligand_type = self.type penalty = '' if self.coupled_titrating_group: - penalty = ' NB: Discarded due to coupling with %s' \ - % self.coupled_titrating_group.label - str_ = " %9s %8.2lf %10.2lf %18s %s\n" % (self.label, - self.pka_value, - self.model_pka, - ligand_type, - penalty) + penalty = ( + ' NB: Discarded due to coupling with %s' + % self.coupled_titrating_group.label) + str_ = ( + " %9s %8.2lf %10.2lf %18s %s\n" + % (self.label, self.pka_value, self.model_pka, ligand_type, + penalty)) return str_ def __str__(self): @@ -621,8 +617,8 @@ class Group: titratable and are in that list are included; otherwise all titratable residues and CYS residues are included. """ - return self.titratable or (self.residue_type == 'CYS' and \ - not self.exclude_cys_from_results) + return self.titratable or ( + self.residue_type == 'CYS' and not self.exclude_cys_from_results) class COOGroup(Group): @@ -786,7 +782,8 @@ class CtermGroup(Group): """C-terminus group.""" def __init__(self, atom): Group.__init__(self, atom) - self.type = 'COO' # this is to deal with the COO-C- parameter unification. + # this is to deal with the COO-C- parameter unification. + self.type = 'COO' def setup_atoms(self): """Set up atoms in group.""" @@ -820,7 +817,8 @@ class BBNGroup(Group): the_hydrogen = self.atom.get_bonded_elements('H') # set the center using the nitrogen self.set_center([self.atom]) - self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) + self.set_interaction_atoms( + the_hydrogen+[self.atom], the_hydrogen+[self.atom]) class BBCGroup(Group): @@ -859,7 +857,8 @@ class NARGroup(Group): the_hydrogen = self.atom.get_bonded_elements('H') # set the center using the nitrogen self.set_center([self.atom]) - self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) + self.set_interaction_atoms( + the_hydrogen+[self.atom], the_hydrogen+[self.atom]) class NAMGroup(Group): @@ -881,7 +880,8 @@ class NAMGroup(Group): the_hydrogen = self.atom.get_bonded_elements('H') # set the center using the nitrogen self.set_center([self.atom]) - self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) + self.set_interaction_atoms( + the_hydrogen+[self.atom], the_hydrogen+[self.atom]) class FGroup(Group): @@ -920,7 +920,8 @@ class OHGroup(Group): the_hydrogen = self.atom.get_bonded_elements('H') # set the center using the nitrogen self.set_center([self.atom]) - self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) + self.set_interaction_atoms( + the_hydrogen+[self.atom], the_hydrogen+[self.atom]) class OPGroup(Group): @@ -938,7 +939,6 @@ class OPGroup(Group): PROTONATOR.protonate_atom(self.atom) # set the center using the oxygen self.set_center([self.atom]) - #self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) self.set_interaction_atoms([self.atom], [self.atom]) @@ -997,7 +997,8 @@ class CGGroup(Group): for nitrogen in the_nitrogens: PROTONATOR.protonate_atom(nitrogen) the_hydrogens += nitrogen.get_bonded_elements('H') - self.set_interaction_atoms(the_hydrogens+the_nitrogens, the_nitrogens) + self.set_interaction_atoms( + the_hydrogens+the_nitrogens, the_nitrogens) class C2NGroup(Group): @@ -1013,8 +1014,8 @@ class C2NGroup(Group): """Set up atoms in this group.""" # Identify the nitrogens the_nitrogens = self.atom.get_bonded_elements('N') - the_nitrogens = [n for n in the_nitrogens \ - if len(n.get_bonded_heavy_atoms()) == 1] + the_nitrogens = [ + n for n in the_nitrogens if len(n.get_bonded_heavy_atoms()) == 1] # set the center using the nitrogen self.set_center([self.atom]) the_hydrogens = [] @@ -1198,12 +1199,14 @@ class TitratableLigandGroup(Group): self.type = 'BLG' self.residue_type = 'BLG' else: - raise Exception('Unable to determine type of ligand group - charge not set?') + raise Exception('Unable to determine type of ligand group - ' + 'charge not set?') # check if marvin model pka has been calculated # this is not true if we are reading an input file if atom.marvin_pka: self.model_pka = atom.marvin_pka - info('Titratable ligand group ', atom, self.model_pka, self.charge) + info('Titratable ligand group ', + atom, self.model_pka, self.charge) self.model_pka_set = True @@ -1233,8 +1236,8 @@ def is_group(parameters, atom): elif parameters.ligand_typing == 'groups': ligand_group = is_ligand_group_by_groups(parameters, atom) else: - errstr = 'Unknown ligand typing method \'%s\'' % parameters.ligand_typing - raise Exception(errstr) + raise Exception('Unknown ligand typing method \'%s\'' + % parameters.ligand_typing) if ligand_group: return ligand_group return None @@ -1314,10 +1317,13 @@ def is_ligand_group_by_groups(_, atom): if atom.sybyl_type == 'C.2': # Guadinium and amidinium groups bonded_nitrogens = atom.get_bonded_elements('N') - npls = [n for n in bonded_nitrogens if (n.sybyl_type == 'N.pl3' \ - and len(n.get_bonded_heavy_atoms()) == 1)] + npls = [ + n for n in bonded_nitrogens + if (n.sybyl_type == 'N.pl3' + and len(n.get_bonded_heavy_atoms()) == 1)] if len(npls) == 2: - n_with_max_two_heavy_atom_bonds = [n for n in bonded_nitrogens \ + n_with_max_two_heavy_atom_bonds = [ + n for n in bonded_nitrogens if len(n.get_bonded_heavy_atoms()) < 3] if len(n_with_max_two_heavy_atom_bonds) == 2: return C2NGroup(atom) @@ -1369,17 +1375,18 @@ def is_ligand_group_by_marvin_pkas(parameters, atom): # TODO - double-check testing coverage of these functions. if not atom.conformation_container.marvin_pkas_calculated: lpka = LigandPkaValues(parameters) - lpka.get_marvin_pkas_for_molecular_container(atom.molecular_container, - min_ph=parameters.min_ligand_model_pka, - max_ph=parameters.max_ligand_model_pka) + lpka.get_marvin_pkas_for_molecular_container( + atom.molecular_container, min_ph=parameters.min_ligand_model_pka, + max_ph=parameters.max_ligand_model_pka) if atom.marvin_pka: return TitratableLigandGroup(atom) - # Special case of oxygen in carboxyl group not assigned a pka value by marvin + # Special case of oxygen in carboxyl group not assigned pka value by marvin if atom.sybyl_type == 'O.co2': atom.charge = -1.0 - other_oxygen = [o for o \ - in atom.get_bonded_elements('C')[0].get_bonded_elements('O') \ - if not o == atom][0] + other_oxygen = [ + o for o + in atom.get_bonded_elements('C')[0].get_bonded_elements('O') + if not o == atom][0] atom.marvin_pka = other_oxygen.marvin_pka return TitratableLigandGroup(atom) if atom.element in parameters.hydrogen_bonds.elements: diff --git a/propka/hybrid36.py b/propka/hybrid36.py index 67f2c0c..8acf0c1 100644 --- a/propka/hybrid36.py +++ b/propka/hybrid36.py @@ -1,4 +1,4 @@ -"""Provides an alternative PDB format that can transparently encode larger atom numbers. +"""Provides alternative PDB format that can encode larger atom numbers. http://cci.lbl.gov/hybrid_36/ """ From e5dedc1786f992e777a68dc4c271204196ca9cbe Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 18:55:02 -0700 Subject: [PATCH 42/65] De-lint iterative.py See https://github.com/jensengroup/propka-3.1/issues/43. --- propka/iterative.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/propka/iterative.py b/propka/iterative.py index d14d86f..5bafc8e 100644 --- a/propka/iterative.py +++ b/propka/iterative.py @@ -10,12 +10,14 @@ from propka.lib import info, debug UNK_MIN_VALUE = 0.005 -def add_to_determinant_list(group1, group2, distance, iterative_interactions, version): +def add_to_determinant_list(group1, group2, distance, iterative_interactions, + version): """Add iterative determinantes to the list. [[R1, R2], [side-chain, coulomb], [A1, A2]], ... - NOTE - the sign is determined when the interaction is added to the iterative object! + NOTE - sign is determined when the interaction is added to the iterative + object! NOTE - distance < coulomb_cutoff here Args: @@ -141,14 +143,18 @@ def add_iterative_ion_pair(object1, object2, interaction, version): q2 = object2.q comp1 = object1.pka_old + annihilation[0] + q1*coulomb_value comp2 = object2.pka_old + annihilation[1] + q2*coulomb_value - if object1.res_name not in version.parameters.exclude_sidechain_interactions: + if (object1.res_name + not in version.parameters.exclude_sidechain_interactions): comp1 += q1*hbond_value - if object2.res_name not in version.parameters.exclude_sidechain_interactions: + if (object2.res_name + not in version.parameters.exclude_sidechain_interactions): comp2 += q2*hbond_value if q1 == -1.0 and comp1 < comp2: - add_term = True # pKa(acid) < pKa(base) + # pKa(acid) < pKa(base) + add_term = True elif q1 == 1.0 and comp1 > comp2: - add_term = True # pKa(base) > pKa(acid) + # pKa(base) > pKa(acid) + add_term = True else: add_term = False annihilation[0] = 0.00 @@ -167,12 +173,14 @@ def add_iterative_ion_pair(object1, object2, interaction, version): # Side-chain if hbond_value > UNK_MIN_VALUE: # residue1 - if object1.res_name not in version.parameters.exclude_sidechain_interactions: + if (object1.res_name + not in version.parameters.exclude_sidechain_interactions): interaction = [object2, q1*hbond_value] annihilation[0] += -q1*hbond_value object1.determinants['sidechain'].append(interaction) # residue2 - if object2.res_name not in version.parameters.exclude_sidechain_interactions: + if (object2.res_name + not in version.parameters.exclude_sidechain_interactions): interaction = [object1, q2*hbond_value] annihilation[1] += -q2*hbond_value object2.determinants['sidechain'].append(interaction) @@ -191,7 +199,7 @@ def add_determinants(iterative_interactions, version, _=None): # --- setup --- iteratives = [] done_group = [] - # creating iterative objects with references to their real group counterparts + # create iterative objects with references to their real group counterparts for interaction in iterative_interactions: pair = interaction[0] for group in pair: @@ -354,8 +362,9 @@ class Iterative: return self.label == other.label else: # For heterogene atoms we also need to check the residue number - return self.label == other.label \ - and self.atom.res_num == other.atom.res_num + return ( + self.label == other.label + and self.atom.res_num == other.atom.res_num) def __hash__(self): """Needed to use objects in sets.""" From 524236600626216946b5c6f9730e6684b879bf2c Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 19:09:11 -0700 Subject: [PATCH 43/65] Clean up line breaks in lib.py See https://github.com/jensengroup/propka-3.1/issues/43 --- propka/lib.py | 146 +++++++++++++++++++++++++++++--------------------- 1 file changed, 85 insertions(+), 61 deletions(-) diff --git a/propka/lib.py b/propka/lib.py index 5ca270e..ab54850 100644 --- a/propka/lib.py +++ b/propka/lib.py @@ -157,7 +157,8 @@ def parse_res_string(res_str): try: chain, resnum_str = res_str.split(":") except ValueError: - raise ValueError("Invalid residue string (must contain 2 colon-separated values)") + raise ValueError("Invalid residue string (must contain 2 " + "colon-separated values)") try: resnum = int(resnum_str) except ValueError: @@ -185,73 +186,96 @@ def build_parser(parser=None): if parser is not None: group = parser.add_argument_group(title="PROPKA invoation options") else: - parser = argparse.ArgumentParser(description=("PROPKA predicts the pKa values of ionizable " - "groups in proteins and protein-ligand " - "complexes based in the 3D structure"), - formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser = argparse.ArgumentParser( + description=("PROPKA predicts the pKa values of ionizable " + "groups in proteins and protein-ligand " + "complexes based in the 3D structure"), + formatter_class=argparse.ArgumentDefaultsHelpFormatter) # This is duck-typing at its finest group = parser group.add_argument("input_pdb", help="read data from ") - group.add_argument("-f", "--file", action="append", dest="filenames", default=[], - help="read data from , i.e. is added to arguments") - group.add_argument("-r", "--reference", dest="reference", default="neutral", - help=("setting which reference to use for stability calculations " - "[neutral/low-pH]")) - group.add_argument("-c", "--chain", action="append", dest="chains", - help=('creating the protein with only a specified chain. Specify ' - '" " for chains without ID [all]')) - group.add_argument("-i", "--titrate_only", dest="titrate_only", - help=('Treat only the specified residues as titratable. Value should ' - 'be a comma-separated list of "chain:resnum" values; for example: ' - '-i "A:10,A:11"')) - group.add_argument("-t", "--thermophile", action="append", dest="thermophiles", - help=("defining a thermophile filename; usually used in " - "'alignment-mutations'")) - group.add_argument("-a", "--alignment", action="append", dest="alignment", - help=("alignment file connecting and " - "[.pir]")) - group.add_argument("-m", "--mutation", action="append", dest="mutations", - help=("specifying mutation labels which is used to modify " - " according to, e.g. N25R/N181D")) - group.add_argument("-v", "--version", dest="version_label", default="Jan15", - help="specifying the sub-version of propka [Jan15/Dec19]") - group.add_argument("-p", "--parameters", dest="parameters", - default=pkg_resources.resource_filename(__name__, "propka.cfg"), - help="set the parameter file [%(default)s]") + group.add_argument( + "-f", "--file", action="append", dest="filenames", default=[], + help="read data from , i.e. is added to arguments") + group.add_argument( + "-r", "--reference", dest="reference", default="neutral", + help=("setting which reference to use for stability calculations " + "[neutral/low-pH]")) + group.add_argument( + "-c", "--chain", action="append", dest="chains", + help=('creating the protein with only a specified chain. Specify ' + '" " for chains without ID [all]')) + group.add_argument( + "-i", "--titrate_only", dest="titrate_only", + help=('Treat only the specified residues as titratable. Value should ' + 'be a comma-separated list of "chain:resnum" values; for example: ' + '-i "A:10,A:11"')) + group.add_argument( + "-t", "--thermophile", action="append", dest="thermophiles", + help=("defining a thermophile filename; usually used in " + "'alignment-mutations'")) + group.add_argument( + "-a", "--alignment", action="append", dest="alignment", + help=("alignment file connecting and " + "[.pir]")) + group.add_argument( + "-m", "--mutation", action="append", dest="mutations", + help=("specifying mutation labels which is used to modify " + " according to, e.g. N25R/N181D")) + group.add_argument( + "-v", "--version", dest="version_label", default="Jan15", + help="specifying the sub-version of propka [Jan15/Dec19]") + group.add_argument( + "-p", "--parameters", dest="parameters", + default=pkg_resources.resource_filename(__name__, "propka.cfg"), + help="set the parameter file [%(default)s]") try: - group.add_argument("--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], - help="logging level verbosity", default="INFO") + group.add_argument( + "--log-level", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="logging level verbosity", default="INFO") except argparse.ArgumentError: # It is possible that --log-level has already been set by APBS pass - group.add_argument("-o", "--pH", dest="pH", type=float, default=7.0, - help="setting pH-value used in e.g. stability calculations [7.0]") - group.add_argument("-w", "--window", dest="window", nargs=3, type=float, - default=(0.0, 14.0, 1.0), - help=("setting the pH-window to show e.g. stability profiles " - "[0.0, 14.0, 1.0]")) - group.add_argument("-g", "--grid", dest="grid", nargs=3, type=float, - default=(0.0, 14.0, 0.1), - help=("setting the pH-grid to calculate e.g. stability " - "related properties [0.0, 14.0, 0.1]")) - group.add_argument("--mutator", dest="mutator", - help="setting approach for mutating [alignment/scwrl/jackal]") - group.add_argument("--mutator-option", dest="mutator_options", action="append", - help="setting property for mutator [e.g. type=\"side-chain\"]") - group.add_argument("-d", "--display-coupled-residues", dest="display_coupled_residues", - action="store_true", help=("Displays alternative pKa values due " - "to coupling of titratable groups")) - group.add_argument("-l", "--reuse-ligand-mol2-files", dest="reuse_ligand_mol2_file", - action="store_true", default=False, - help=("Reuses the ligand mol2 files allowing the user to alter " - "ligand bond orders")) - group.add_argument("-k", "--keep-protons", dest="keep_protons", action="store_true", - help="Keep protons in input file", default=False) - group.add_argument("-q", "--quiet", action="store_const", const="WARNING", - dest="log_level", help="suppress non-warning messages") - group.add_argument("--protonate-all", dest="protonate_all", action="store_true", - help="Protonate all atoms (will not influence pKa calculation)", - default=False) + group.add_argument( + "-o", "--pH", dest="pH", type=float, default=7.0, + help="setting pH-value used in e.g. stability calculations [7.0]") + group.add_argument( + "-w", "--window", dest="window", nargs=3, type=float, + default=(0.0, 14.0, 1.0), + help=("setting the pH-window to show e.g. stability profiles " + "[0.0, 14.0, 1.0]")) + group.add_argument( + "-g", "--grid", dest="grid", nargs=3, type=float, + default=(0.0, 14.0, 0.1), + help=("setting the pH-grid to calculate e.g. stability " + "related properties [0.0, 14.0, 0.1]")) + group.add_argument( + "--mutator", dest="mutator", + help="setting approach for mutating [alignment/scwrl/jackal]") + group.add_argument( + "--mutator-option", dest="mutator_options", action="append", + help="setting property for mutator [e.g. type=\"side-chain\"]") + group.add_argument( + "-d", "--display-coupled-residues", dest="display_coupled_residues", + action="store_true", + help=("Displays alternative pKa values due " + "to coupling of titratable groups")) + group.add_argument( + "-l", "--reuse-ligand-mol2-files", dest="reuse_ligand_mol2_file", + action="store_true", default=False, + help=("Reuses the ligand mol2 files allowing the user to alter " + "ligand bond orders")) + group.add_argument( + "-k", "--keep-protons", dest="keep_protons", action="store_true", + help="Keep protons in input file", default=False) + group.add_argument( + "-q", "--quiet", action="store_const", const="WARNING", + dest="log_level", help="suppress non-warning messages") + group.add_argument( + "--protonate-all", dest="protonate_all", action="store_true", + help="Protonate all atoms (will not influence pKa calculation)", + default=False) return parser From 72851b54980eab5b663ba013f201a660b0b8b1a3 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 19:21:56 -0700 Subject: [PATCH 44/65] Clean line breaks. See https://github.com/jensengroup/propka-3.1/issues/43 --- propka/ligand_pka_values.py | 78 +++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/propka/ligand_pka_values.py b/propka/ligand_pka_values.py index 7831cb2..ae38c3f 100644 --- a/propka/ligand_pka_values.py +++ b/propka/ligand_pka_values.py @@ -37,8 +37,10 @@ class LigandPkaValues: location of program """ path = os.environ.get('PATH').split(os.pathsep) - locs = [i for i in filter(lambda loc: os.access(loc, os.F_OK), \ - map(lambda dir: os.path.join(dir, program), path))] + locs = [ + i for i in filter(lambda loc: os.access(loc, os.F_OK), + map(lambda dir: os.path.join(dir, program), + path))] if len(locs) == 0: str_ = "'Error: Could not find %s." % program str_ += ' Please make sure that it is found in the path.' @@ -46,7 +48,8 @@ class LigandPkaValues: sys.exit(-1) return locs[0] - def get_marvin_pkas_for_pdb_file(self, pdbfile, num_pkas=10, min_ph=-10, max_ph=20): + def get_marvin_pkas_for_pdb_file(self, pdbfile, num_pkas=10, min_ph=-10, + max_ph=20): """Use Marvin executables to get pKas for a PDB file. Args: @@ -56,12 +59,11 @@ class LigandPkaValues: max_ph: maximum pH value """ molecule = propka.molecular_container.Molecular_container(pdbfile) - self.get_marvin_pkas_for_molecular_container(molecule, - num_pkas=num_pkas, - min_ph=min_ph, - max_ph=max_ph) + self.get_marvin_pkas_for_molecular_container( + molecule, num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph) - def get_marvin_pkas_for_molecular_container(self, molecule, num_pkas=10, min_ph=-10, max_ph=20): + def get_marvin_pkas_for_molecular_container(self, molecule, num_pkas=10, + min_ph=-10, max_ph=20): """Use Marvin executables to calculate pKas for a molecular container. Args: @@ -72,13 +74,10 @@ class LigandPkaValues: """ for name in molecule.conformation_names: filename = '%s_%s' % (molecule.name, name) - self.get_marvin_pkas_for_conformation_container(molecule.conformations[name], - name=filename, - reuse=molecule.\ - options.reuse_ligand_mol2_file, - num_pkas=num_pkas, - min_ph=min_ph, - max_ph=max_ph) + self.get_marvin_pkas_for_conformation_container( + molecule.conformations[name], name=filename, + reuse=molecule.options.reuse_ligand_mol2_file, + num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph) def get_marvin_pkas_for_conformation_container(self, conformation, name='temp', reuse=False, @@ -95,10 +94,9 @@ class LigandPkaValues: max_ph: maximum pH value """ conformation.marvin_pkas_calculated = True - self.get_marvin_pkas_for_atoms(conformation.get_heavy_ligand_atoms(), - name=name, reuse=reuse, - num_pkas=num_pkas, min_ph=min_ph, - max_ph=max_ph) + self.get_marvin_pkas_for_atoms( + conformation.get_heavy_ligand_atoms(), name=name, reuse=reuse, + num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph) def get_marvin_pkas_for_atoms(self, atoms, name='temp', reuse=False, num_pkas=10, min_ph=-10, max_ph=20): @@ -116,9 +114,9 @@ class LigandPkaValues: molecules = propka.lib.split_atoms_into_molecules(atoms) for i, molecule in enumerate(molecules): filename = '%s_%d.mol2'%(name, i+1) - self.get_marvin_pkas_for_molecule(molecule, filename=filename, - reuse=reuse, num_pkas=num_pkas, - min_ph=min_ph, max_ph=max_ph) + self.get_marvin_pkas_for_molecule( + molecule, filename=filename, reuse=reuse, num_pkas=num_pkas, + min_ph=min_ph, max_ph=max_ph) def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2', reuse=False, num_pkas=10, min_ph=-10, @@ -138,25 +136,28 @@ class LigandPkaValues: propka.pdb.write_mol2_for_atoms(atoms, filename) # check that we actually have a file to work with if not os.path.isfile(filename): - errstr = "Didn't find a user-modified file '%s' - generating one" \ - % filename + errstr = ("Didn't find a user-modified file '%s' - generating one" + % filename) warning(errstr) propka.pdb.write_mol2_for_atoms(atoms, filename) # Marvin calculate pKa values - options = 'pka -a %d -b %d --min %f --max %f -d large' % (num_pkas, - num_pkas, - min_ph, - max_ph) - (output, errors) = subprocess.Popen([self.cxcalc, filename]+options.split(), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE).communicate() + options = ('pka -a %d -b %d --min %f --max %f -d large' + % (num_pkas, num_pkas, min_ph, max_ph)) + (output, errors) = subprocess.Popen( + [self.cxcalc, filename]+options.split(), stdout=subprocess.PIPE, + stderr=subprocess.PIPE).communicate() if len(errors) > 0: - info('********************************************************************************************************') - info('* Warning: Marvin execution failed: *') + info('***********************************************************' + '*********************************************') + info('* Warning: Marvin execution failed: ' + ' *') info('* %-100s *' % errors) - info('* *') - info('* Please edit the ligand mol2 file and re-run PropKa with the -l option: %29s *' % filename) - info('********************************************************************************************************') + info('* ' + ' *') + info('* Please edit the ligand mol2 file and re-run PropKa with ' + 'the -l option: %29s *' % filename) + info('***********************************************************' + '*********************************************') sys.exit(-1) # extract calculated pkas indices, pkas, types = self.extract_pkas(output) @@ -182,8 +183,9 @@ class LigandPkaValues: tags = tags.split('\t') values = values.split('\t') # format values - types = [tags[i][0] for i in range(1, len(tags)-1) if len(values) > i \ - and values[i] != ''] + types = [ + tags[i][0] for i in range(1, len(tags)-1) + if len(values) > i and values[i] != ''] indices = [int(a)-1 for a in values[-1].split(',') if a != ''] values = [float(v.replace(',', '.')) for v in values[1:-1] if v != ''] if len(indices) != len(values) != len(types): From 2658f80906c27e396792536eb19910dce17509ed Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 19:33:07 -0700 Subject: [PATCH 45/65] Clean up line breaks in ligand.py See https://github.com/jensengroup/propka-3.1/issues/43 --- propka/ligand.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/propka/ligand.py b/propka/ligand.py index 2be158d..3386793 100644 --- a/propka/ligand.py +++ b/propka/ligand.py @@ -123,20 +123,22 @@ def assign_sybyl_type(atom): n_atom = nitrogens[0] o_atom = oxygens[0] if c_atom and n_atom and o_atom: - # make sure that the Nitrogen is not aromatic and that it has two heavy atom bonds - if not is_aromatic_ring(is_ring_member(n_atom)) \ - and len(n_atom.get_bonded_heavy_atoms()) == 2: + # make sure that the Nitrogen is not aromatic and that it has two + # heavy atom bonds + if (not is_aromatic_ring(is_ring_member(n_atom)) + and len(n_atom.get_bonded_heavy_atoms()) == 2): set_type(n_atom, 'N.am') set_type(c_atom, 'C.2') set_type(o_atom, 'O.2') return if atom.element == 'C': # check for carboxyl - if len(atom.bonded_atoms) == 3 and list(bonded_elements.values()).count('O') == 2: + if (len(atom.bonded_atoms) == 3 + and list(bonded_elements.values()).count('O') == 2): index1 = list(bonded_elements.values()).index('O') index2 = list(bonded_elements.values()).index('O', index1+1) - if len(atom.bonded_atoms[index1].bonded_atoms) == 1 \ - and len(atom.bonded_atoms[index2].bonded_atoms) == 1: + if (len(atom.bonded_atoms[index1].bonded_atoms) == 1 + and len(atom.bonded_atoms[index2].bonded_atoms) == 1): set_type(atom.bonded_atoms[index1], 'O.co2-') set_type(atom.bonded_atoms[index2], 'O.co2') set_type(atom, 'C.2') @@ -144,7 +146,8 @@ def assign_sybyl_type(atom): # sp carbon if len(atom.bonded_atoms) <= 2: for bonded_atom in atom.bonded_atoms: - if squared_distance(atom, bonded_atom) < MAX_C_TRIPLE_BOND_SQUARED: + if (squared_distance(atom, bonded_atom) + < MAX_C_TRIPLE_BOND_SQUARED): set_type(atom, 'C.1') set_type(bonded_atom, bonded_atom.element + '.1') if atom.sybyl_assigned: @@ -155,8 +158,8 @@ def assign_sybyl_type(atom): # check for N.pl3 for bonded_atom in atom.bonded_atoms: if bonded_atom.element == 'N': - if len(bonded_atom.bonded_atoms) < 3 \ - or is_planar(bonded_atom): + if (len(bonded_atom.bonded_atoms) < 3 + or is_planar(bonded_atom)): set_type(bonded_atom, 'N.pl3') return # sp3 carbon @@ -181,16 +184,18 @@ def assign_sybyl_type(atom): # check for carboxyl if atom.bonded_atoms[0].element == 'C': the_carbon = atom.bonded_atoms[0] - if len(the_carbon.bonded_atoms) == 3 \ - and the_carbon.count_bonded_elements('O') == 2: + if (len(the_carbon.bonded_atoms) == 3 + and the_carbon.count_bonded_elements('O') == 2): [oxy1, oxy2] = the_carbon.get_bonded_elements('O') - if len(oxy1.bonded_atoms) == 1 and len(oxy2.bonded_atoms) == 1: + if (len(oxy1.bonded_atoms) == 1 + and len(oxy2.bonded_atoms) == 1): set_type(oxy1, 'O.co2-') set_type(oxy2, 'O.co2') set_type(the_carbon, 'C.2') return # check for X=O - if squared_distance(atom, atom.bonded_atoms[0]) < MAX_C_DOUBLE_BOND_SQUARED: + if (squared_distance(atom, atom.bonded_atoms[0]) + < MAX_C_DOUBLE_BOND_SQUARED): set_type(atom, 'O.2') if atom.bonded_atoms[0].element == 'C': set_type(atom.bonded_atoms[0], 'C.2') @@ -262,8 +267,8 @@ def identify_ring(this_atom, original_atom, number, past_atoms): these_return_atoms = identify_ring(atom, original_atom, number, past_atoms) if len(these_return_atoms) > 0: - if len(return_atoms) > len(these_return_atoms) \ - or len(return_atoms) == 0: + if (len(return_atoms) > len(these_return_atoms) + or len(return_atoms) == 0): return_atoms = these_return_atoms return return_atoms From a27f054ac7f7d531007f6014811a349521a7f826 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 19:43:32 -0700 Subject: [PATCH 46/65] Clean up line breaks in molecular_container.py. --- propka/molecular_container.py | 79 ++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/propka/molecular_container.py b/propka/molecular_container.py index 3ef1478..0c97448 100644 --- a/propka/molecular_container.py +++ b/propka/molecular_container.py @@ -56,10 +56,11 @@ class Molecular_container: if input_file_extension[0:4] == '.pdb': # input is a pdb file. read in atoms and top up containers to make # sure that all atoms are present in all conformations - [self.conformations, self.conformation_names] \ - = propka.pdb.read_pdb(input_file, self.version.parameters, self) + [self.conformations, self.conformation_names] = ( + propka.pdb.read_pdb(input_file, self.version.parameters, self)) if len(self.conformations) == 0: - info('Error: The pdb file does not seems to contain any molecular conformations') + info('Error: The pdb file does not seems to contain any ' + 'molecular conformations') sys.exit(-1) self.top_up_conformations() # make a structure precheck @@ -79,9 +80,9 @@ class Molecular_container: propka.pdb.write_input(self, filename) elif input_file_extension == '.propka_input': #input is a propka_input file - [self.conformations, self.conformation_names] \ - = propka.pdb.read_input(input_file, self.version.parameters, - self) + [self.conformations, self.conformation_names] = ( + propka.pdb.read_input(input_file, self.version.parameters, + self)) # Extract groups - this merely sets up the groups found in the # input file self.extract_groups() @@ -94,8 +95,8 @@ class Molecular_container: def top_up_conformations(self): """Makes sure that all atoms are present in all conformations.""" for name in self.conformation_names: - if name != '1A' and (len(self.conformations[name]) \ - < len(self.conformations['1A'])): + if (name != '1A' and (len(self.conformations[name]) + < len(self.conformations['1A']))): self.conformations[name].top_up(self.conformations['1A']) def find_covalently_coupled_groups(self): @@ -109,7 +110,8 @@ class Molecular_container: info('-' * 103) verbose = self.options.display_coupled_residues for name in self.conformation_names: - self.conformations[name].find_non_covalently_coupled_groups(verbose=verbose) + self.conformations[name].find_non_covalently_coupled_groups( + verbose=verbose) def extract_groups(self): """Identify the groups needed for pKa calculation.""" @@ -125,7 +127,8 @@ class Molecular_container: """Calculate pKa values.""" # calculate for each conformation for name in self.conformation_names: - self.conformations[name].calculate_pka(self.version, self.options) + self.conformations[name].calculate_pka( + self.version, self.options) # find non-covalently coupled groups self.find_non_covalently_coupled_groups() # find the average of the conformations @@ -137,9 +140,8 @@ class Molecular_container: """Generate an average of conformations.""" parameters = self.conformations[self.conformation_names[0]].parameters # make a new configuration to hold the average values - avr_conformation = ConformationContainer(name='average', - parameters=parameters, - molecular_container=self) + avr_conformation = ConformationContainer( + name='average', parameters=parameters, molecular_container=self) container = self.conformations[self.conformation_names[0]] for group in container.get_groups_for_calculations(): # new group to hold average values @@ -150,8 +152,8 @@ class Molecular_container: if group_to_add: avr_group += group_to_add else: - str_ = 'Group %s could not be found in conformation %s.' \ - % (group.atom.residue_label, name) + str_ = ('Group %s could not be found in conformation %s.' + % (group.atom.residue_label, name)) warning(str_) # ... and store the average value avr_group = avr_group / len(self.conformation_names) @@ -161,7 +163,8 @@ class Molecular_container: self.conformations.values()))): avr_conformation.non_covalently_coupled_groups = True # store chain info - avr_conformation.chains = self.conformations[self.conformation_names[0]].chains + avr_conformation.chains = self.conformations[ + self.conformation_names[0]].chains self.conformations['AVR'] = avr_conformation def write_pka(self, filename=None, reference="neutral", @@ -180,12 +183,14 @@ class Molecular_container: # to an alternative pka file if self.options.display_coupled_residues: filename = os.path.join('%s_alt_state.pka' % (self.name)) - if hasattr(self.version.parameters, 'output_file_tag') \ - and len(self.version.parameters.output_file_tag) > 0: - filename = os.path.join('%s_%s.pka' % (self.name, - self.version.parameters.output_file_tag)) - propka.output.write_pka(self, self.version.parameters, filename=filename, - conformation='AVR', reference=reference) + if (hasattr(self.version.parameters, 'output_file_tag') + and len(self.version.parameters.output_file_tag) > 0): + filename = os.path.join( + '%s_%s.pka' % (self.name, + self.version.parameters.output_file_tag)) + propka.output.write_pka( + self, self.version.parameters, filename=filename, + conformation='AVR', reference=reference) def get_folding_profile(self, conformation='AVR', reference="neutral", grid=[0., 14., 0.1]): @@ -238,8 +243,8 @@ class Molecular_container: charge_profile = [] for ph in propka.lib.make_grid(*grid): conf = self.conformations[conformation] - q_unfolded, q_folded = conf.calculate_charge(self.version.parameters, - ph=ph) + q_unfolded, q_folded = conf.calculate_charge( + self.version.parameters, ph=ph) charge_profile.append([ph, q_unfolded, q_folded]) return charge_profile @@ -254,8 +259,8 @@ class Molecular_container: 1. Folded state PI 2. Unfolded state PI """ - charge_profile = self.get_charge_profile(conformation=conformation, - grid=grid) + charge_profile = self.get_charge_profile( + conformation=conformation, grid=grid) pi_folded = pi_unfolded = [None, 1e6, 1e6] for point in charge_profile: pi_folded = min(pi_folded, point, key=lambda v: abs(v[2])) @@ -266,16 +271,14 @@ class Molecular_container: pi_unfolded_value = pi_unfolded[0] step = grid[2] # TODO - need to warn if maximum number of iterations is exceeded - if (pi_folded[2] > UNK_PI_CUTOFF or pi_unfolded[1] > UNK_PI_CUTOFF) \ - and iteration < MAX_ITERATION: - pi_folded_value, _ = self.get_pi(conformation=conformation, - grid=[pi_folded[0]-step, - pi_folded[0]+step, - step/10.0], - iteration=iteration+1) - _, pi_unfolded_value = self.get_pi(conformation=conformation, - grid=[pi_unfolded[0]-step, - pi_unfolded[0]+step, - step/10.0], - iteration=iteration+1) + if ((pi_folded[2] > UNK_PI_CUTOFF + or pi_unfolded[1] > UNK_PI_CUTOFF) and iteration < MAX_ITERATION): + pi_folded_value, _ = self.get_pi( + conformation=conformation, + grid=[pi_folded[0]-step, pi_folded[0]+step, step/10.0], + iteration=iteration+1) + _, pi_unfolded_value = self.get_pi( + conformation=conformation, + grid=[pi_unfolded[0]-step, pi_unfolded[0]+step, step/10.0], + iteration=iteration+1) return pi_folded_value, pi_unfolded_value From c796ba6b4df6513b042fba1e0611998464d48a3a Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 20:00:10 -0700 Subject: [PATCH 47/65] Clean up line breaks in output.py See https://github.com/jensengroup/propka-3.1/issues/43 --- propka/output.py | 156 +++++++++++++++++++++++++++++------------------ 1 file changed, 95 insertions(+), 61 deletions(-) diff --git a/propka/output.py b/propka/output.py index 85f807e..be0e353 100644 --- a/propka/output.py +++ b/propka/output.py @@ -82,9 +82,9 @@ def write_pka(protein, parameters, filename=None, conformation='1A', str_ += get_summary_section(protein, conformation, parameters) str_ += "%s\n" % get_the_line() # printing Folding Profile - str_ += get_folding_profile_section(protein, conformation=conformation, - reference=reference, - window=[0., 14., 1.0]) + str_ += get_folding_profile_section( + protein, conformation=conformation, reference=reference, + window=[0., 14., 1.0]) # printing Protein Charge Profile str_ += get_charge_profile_section(protein, conformation=conformation) # now, writing the pka text to file @@ -116,8 +116,9 @@ def print_tm_profile(protein, reference="neutral", window=[0., 14., 1.], else: str_ = " suggested Tm-profile for %s\n" % (protein.name) for (ph, tm_) in profile: - if ph >= window[0] and ph <= window[1] and (ph%window[2] < 0.01 \ - or ph%window[2] > 0.99*window[2]): + if (ph >= window[0] and ph <= window[1] + and (ph%window[2] < 0.01 + or ph%window[2] > 0.99*window[2])): str_ += "%6.2lf%10.2lf\n" % (ph, tm_) info(str_) @@ -163,15 +164,16 @@ def get_determinant_section(protein, conformation, parameters): # printing determinants for chain in protein.conformations[conformation].chains: for residue_type in parameters.write_out_order: - groups = [g for g in protein.conformations[conformation].groups \ + groups = [ + g for g in protein.conformations[conformation].groups if g.atom.chain_id == chain] for group in groups: if group.residue_type == residue_type: - str_ += "%s" \ - % group.get_determinant_string(parameters.remove_penalised_group) + str_ += "%s" % group.get_determinant_string( + parameters.remove_penalised_group) # Add a warning in case of coupled residues - if protein.conformations[conformation].non_covalently_coupled_groups \ - and not protein.options.display_coupled_residues: + if (protein.conformations[conformation].non_covalently_coupled_groups + and not protein.options.display_coupled_residues): str_ += 'Coupled residues (marked *) were detected.' str_ += 'Please rerun PropKa with the --display-coupled-residues \n' str_ += 'or -d option for detailed information.\n' @@ -193,8 +195,8 @@ def get_summary_section(protein, conformation, parameters): for residue_type in parameters.write_out_order: for group in protein.conformations[conformation].groups: if group.residue_type == residue_type: - str_ += "%s" \ - % group.get_summary_string(parameters.remove_penalised_group) + str_ += "%s" % group.get_summary_string( + parameters.remove_penalised_group) return str_ @@ -219,9 +221,10 @@ def get_folding_profile_section(protein, conformation='AVR', str_ += "\n" str_ += "Free energy of %9s (kcal/mol) as a function" % direction str_ += " of pH (using %s reference)\n" % reference - profile, [ph_opt, dg_opt], [dg_min, dg_max], [ph_min, ph_max] \ - = protein.get_folding_profile(conformation=conformation, - reference=reference, grid=[0., 14., 0.1]) + profile, [ph_opt, dg_opt], [dg_min, dg_max], [ph_min, ph_max] = ( + protein.get_folding_profile( + conformation=conformation, reference=reference, + grid=[0., 14., 0.1])) if profile is None: str_ += "Could not determine folding profile\n" else: @@ -234,7 +237,8 @@ def get_folding_profile_section(protein, conformation='AVR', str_ += "Could not determine pH optimum\n" else: str_ += "The pH of optimum stability is %4.1lf" % ph_opt - str_ += " for which the free energy is %6.1lf kcal/mol at 298K\n" % dg_opt + str_ += (" for which the free energy is %6.1lf kcal/mol at 298K\n" + % dg_opt) if dg_min is None or dg_max is None: str_ += "Could not determine pH values where the free energy" str_ += " is within 80 %s of minimum\n" % ("%") @@ -273,8 +277,7 @@ def get_charge_profile_section(protein, conformation='AVR', _=None): if pi_pro is None or pi_mod is None: str_ += "Could not determine the pI\n\n" else: - str_ += "The pI is %5.2lf (folded) and %5.2lf (unfolded)\n" % (pi_pro, - pi_mod) + str_ += ("The pI is %5.2lf (folded) and %5.2lf (unfolded)\n") return str_ @@ -311,23 +314,39 @@ def get_propka_header(): """ today = date.today() str_ = "propka3.1 %93s\n" % (today) - str_ += "-------------------------------------------------------------------------------------------------------\n" - str_ += "-- --\n" - str_ += "-- PROPKA: A PROTEIN PKA PREDICTOR --\n" - str_ += "-- --\n" - str_ += "-- VERSION 1.0, 04/25/2004, IOWA CITY --\n" - str_ += "-- BY HUI LI --\n" - str_ += "-- --\n" - str_ += "-- VERSION 2.0, 11/05/2007, IOWA CITY/COPENHAGEN --\n" - str_ += "-- BY DELPHINE C. BAS AND DAVID M. ROGERS --\n" - str_ += "-- --\n" - str_ += "-- VERSION 3.0, 01/06/2011, COPENHAGEN --\n" - str_ += "-- BY MATS H.M. OLSSON AND CHRESTEN R. SONDERGARD --\n" - str_ += "-- --\n" - str_ += "-- VERSION 3.1, 07/01/2011, COPENHAGEN --\n" - str_ += "-- BY CHRESTEN R. SONDERGARD AND MATS H.M. OLSSON --\n" - str_ += "-------------------------------------------------------------------------------------------------------\n" - str_ += "\n" + str_ += ("---------------------------------------------------------------" + "----------------------------------------\n") + str_ += ("-- " + " --\n") + str_ += ("-- PROPKA: A PROTEIN PKA " + "PREDICTOR --\n") + str_ += ("-- " + " --\n") + str_ += ("-- VERSION 1.0, 04/25/2004, " + "IOWA CITY --\n") + str_ += ("-- BY HUI LI " + " --\n") + str_ += ("-- " + " --\n") + str_ += ("-- VERSION 2.0, 11/05/2007, IOWA " + "CITY/COPENHAGEN --\n") + str_ += ("-- BY DELPHINE C. BAS AND DAVID " + "M. ROGERS --\n") + str_ += ("-- " + " --\n") + str_ += ("-- VERSION 3.0, 01/06/2011, " + "COPENHAGEN --\n") + str_ += ("-- BY MATS H.M. OLSSON AND CHRESTEN " + "R. SONDERGARD --\n") + str_ += ("-- " + " --\n") + str_ += ("-- VERSION 3.1, 07/01/2011, " + "COPENHAGEN --\n") + str_ += ("-- BY CHRESTEN R. SONDERGARD AND " + "MATS H.M. OLSSON --\n") + str_ += ("---------------------------------------------------------------" + "----------------------------------------\n") + str_ += ("\n") return str_ @@ -338,27 +357,38 @@ def get_references_header(): string """ str_ = "" - str_ += "-------------------------------------------------------------------------------------------------------\n" - str_ += " References:\n" - str_ += "\n" - str_ += " Very Fast Empirical Prediction and Rationalization of Protein pKa Values\n" - str_ += " Hui Li, Andrew D. Robertson and Jan H. Jensen\n" - str_ += " PROTEINS: Structure, Function, and Bioinformatics 61:704-721 (2005)\n" - str_ += " \n" - str_ += " Very Fast Prediction and Rationalization of pKa Values for Protein-Ligand Complexes\n" - str_ += " Delphine C. Bas, David M. Rogers and Jan H. Jensen\n" - str_ += " PROTEINS: Structure, Function, and Bioinformatics 73:765-783 (2008)\n" - str_ += " \n" - str_ += " PROPKA3: Consistent Treatment of Internal and Surface Residues in Empirical pKa predictions\n" - str_ += " Mats H.M. Olsson, Chresten R. Sondergard, Michal Rostkowski, and Jan H. Jensen\n" - str_ += " Journal of Chemical Theory and Computation, 7(2):525-537 (2011)\n" - str_ += " \n" - str_ += " Improved Treatment of Ligands and Coupling Effects in Empirical Calculation\n" - str_ += " and Rationalization of pKa Values\n" - str_ += " Chresten R. Sondergaard, Mats H.M. Olsson, Michal Rostkowski, and Jan H. Jensen\n" - str_ += " Journal of Chemical Theory and Computation, (2011)\n" - str_ += " \n" - str_ += "-------------------------------------------------------------------------------------------------------\n" + str_ += ("---------------------------------------------------------------" + "----------------------------------------\n") + str_ += (" References:\n") + str_ += ("\n") + str_ += (" Very Fast Empirical Prediction and Rationalization of " + "Protein pKa Values\n") + str_ += (" Hui Li, Andrew D. Robertson and Jan H. Jensen\n") + str_ += (" PROTEINS: Structure, Function, and Bioinformatics 61:704-721" + " (2005)\n") + str_ += (" \n") + str_ += (" Very Fast Prediction and Rationalization of pKa Values for " + "Protein-Ligand Complexes\n") + str_ += (" Delphine C. Bas, David M. Rogers and Jan H. Jensen\n") + str_ += (" PROTEINS: Structure, Function, and Bioinformatics 73:765-" + "783 (2008)\n") + str_ += (" \n") + str_ += (" PROPKA3: Consistent Treatment of Internal and Surface " + "Residues in Empirical pKa predictions\n") + str_ += (" Mats H.M. Olsson, Chresten R. Sondergard, Michal Rostkowski, " + "and Jan H. Jensen\n") + str_ += (" Journal of Chemical Theory and Computation, 7(2):525-537 " + "(2011)\n") + str_ += (" \n") + str_ += (" Improved Treatment of Ligands and Coupling Effects in " + "Empirical Calculation\n") + str_ += (" and Rationalization of pKa Values\n") + str_ += (" Chresten R. Sondergaard, Mats H.M. Olsson, Michal " + "Rostkowski, and Jan H. Jensen\n") + str_ += (" Journal of Chemical Theory and Computation, (2011)\n") + str_ += (" \n") + str_ += ("--------------------------------------------------------------" + "-----------------------------------------\n") return str_ @@ -381,10 +411,14 @@ def get_determinants_header(): string """ str_ = "" - str_ += "--------- ----- ------ --------------------- -------------- -------------- --------------\n" - str_ += " DESOLVATION EFFECTS SIDECHAIN BACKBONE COULOMBIC \n" - str_ += " RESIDUE pKa BURIED REGULAR RE HYDROGEN BOND HYDROGEN BOND INTERACTION \n" - str_ += "--------- ----- ------ --------- --------- -------------- -------------- --------------\n" + str_ += ("--------- ----- ------ --------------------- " + "-------------- -------------- --------------\n") + str_ += (" DESOLVATION EFFECTS " + "SIDECHAIN BACKBONE COULOMBIC \n") + str_ += (" RESIDUE pKa BURIED REGULAR RE " + "HYDROGEN BOND HYDROGEN BOND INTERACTION \n") + str_ += ("--------- ----- ------ --------- --------- " + "-------------- -------------- --------------\n") return str_ @@ -402,7 +436,7 @@ def get_summary_header(): def get_the_line(): - """Draw the line - Johnny Cash would have been proud - or actually Aerosmith! + """Draw the line-Johnny Cash would have been proud-or actually Aerosmith! NOTE - Johnny Cash walked the line. From baed55e79acc66162abdb751c7353b4285f3443a Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 20:12:47 -0700 Subject: [PATCH 48/65] Clean up line breaks in parameters.py See https://github.com/jensengroup/propka-3.1/issues/43 --- propka/parameters.py | 142 ++++++++++++++++++++++++------------------- 1 file changed, 78 insertions(+), 64 deletions(-) diff --git a/propka/parameters.py b/propka/parameters.py index 3b6a886..9210081 100644 --- a/propka/parameters.py +++ b/propka/parameters.py @@ -7,27 +7,28 @@ from propka.lib import info, warning # names and types of all key words in configuration file MATRICES = ['interaction_matrix'] PAIR_WISE_MATRICES = ['sidechain_cutoffs'] -NUMBER_DICTIONARIES = ['VanDerWaalsVolume', 'charge', 'model_pkas', 'ions', - 'valence_electrons', 'custom_model_pkas'] +NUMBER_DICTIONARIES = [ + 'VanDerWaalsVolume', 'charge', 'model_pkas', 'ions', 'valence_electrons', + 'custom_model_pkas'] LIST_DICTIONARIES = ['backbone_NH_hydrogen_bond', 'backbone_CO_hydrogen_bond'] STRING_DICTIONARIES = ['protein_group_mapping'] -STRING_LISTS = ['ignore_residues', 'angular_dependent_sidechain_interactions', - 'acid_list', 'base_list', 'exclude_sidechain_interactions', - 'backbone_reorganisation_list', 'write_out_order'] +STRING_LISTS = [ + 'ignore_residues', 'angular_dependent_sidechain_interactions', + 'acid_list', 'base_list', 'exclude_sidechain_interactions', + 'backbone_reorganisation_list', 'write_out_order'] DISTANCES = ['desolv_cutoff', 'buried_cutoff', 'coulomb_cutoff1', 'coulomb_cutoff2'] -PARAMETERS = ['Nmin', 'Nmax', 'desolvationSurfaceScalingFactor', - 'desolvationPrefactor', 'desolvationAllowance', 'coulomb_diel', - 'COO_HIS_exception', 'OCO_HIS_exception', 'CYS_HIS_exception', - 'CYS_CYS_exception', 'min_ligand_model_pka', - 'max_ligand_model_pka', 'include_H_in_interactions', - 'coupling_max_number_of_bonds', - 'min_bond_distance_for_hydrogen_bonds', 'coupling_penalty', - 'shared_determinants', 'common_charge_centre', - 'hide_penalised_group', 'remove_penalised_group', - 'max_intrinsic_pka_diff', 'min_interaction_energy', - 'max_free_energy_diff', 'min_swap_pka_shift', 'min_pka', - 'max_pka', 'sidechain_interaction'] +PARAMETERS = [ + 'Nmin', 'Nmax', 'desolvationSurfaceScalingFactor', 'desolvationPrefactor', + 'desolvationAllowance', 'coulomb_diel', 'COO_HIS_exception', + 'OCO_HIS_exception', 'CYS_HIS_exception', 'CYS_CYS_exception', + 'min_ligand_model_pka', 'max_ligand_model_pka', + 'include_H_in_interactions', 'coupling_max_number_of_bonds', + 'min_bond_distance_for_hydrogen_bonds', 'coupling_penalty', + 'shared_determinants', 'common_charge_centre', 'hide_penalised_group', + 'remove_penalised_group', 'max_intrinsic_pka_diff', + 'min_interaction_energy', 'max_free_energy_diff', 'min_swap_pka_shift', + 'min_pka', 'max_pka', 'sidechain_interaction'] STRINGS = ['version', 'output_file_tag', 'ligand_typing', 'pH', 'reference'] @@ -44,7 +45,7 @@ class Parameters: self.model_pkas = {} self.interaction_matrix = InteractionMatrix("interaction_matrix") self.sidechain_cutoffs = None - # TODO - it would be nice to rename these but they're defined everywhere + # TODO - it would be nice to rename these; they're defined everywhere self.COO_HIS_exception = None self.OCO_HIS_exception = None self.CYS_HIS_exception = None @@ -184,10 +185,11 @@ class Parameters: def set_up_data_structures(self): """Set up internal data structures. - TODO - it would be better to make these assignments explicit in __init__. + TODO - it would be better to make these assignments explicit in + __init__. """ - for key_word in NUMBER_DICTIONARIES + LIST_DICTIONARIES \ - + STRING_DICTIONARIES: + for key_word in (NUMBER_DICTIONARIES + LIST_DICTIONARIES + + STRING_DICTIONARIES): setattr(self, key_word, {}) for key_word in STRING_LISTS: setattr(self, key_word, []) @@ -208,44 +210,52 @@ class Parameters: info('') info('--------------- Interactions --------------------------') - agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', - 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', - 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', - 'N1', 'O2', 'OP', 'SH'] - lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', - 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - map_ = {'CG': ['ARG'], 'C2N': ['ARG'], 'N30': ['N+', 'LYS'], - 'N31': ['N+', 'LYS'], 'N32': ['N+', 'LYS'], 'N33': ['N+', 'LYS'], - 'NAR': ['HIS'], 'OCO': ['COO'], 'OP': [], 'SH': ['CYS'], - 'NP1': [], 'OH': ['ROH'], 'O3': [], 'CL': [], 'F': [], - 'NAM': ['AMD'], 'N1': [], 'O2': []} + agroups = [ + 'COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', + 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', + 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] + lgroups = [ + 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', + 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] + map_ = { + 'CG': ['ARG'], 'C2N': ['ARG'], 'N30': ['N+', 'LYS'], + 'N31': ['N+', 'LYS'], 'N32': ['N+', 'LYS'], 'N33': ['N+', 'LYS'], + 'NAR': ['HIS'], 'OCO': ['COO'], 'OP': [], 'SH': ['CYS'], + 'NP1': [], 'OH': ['ROH'], 'O3': [], 'CL': [], 'F': [], + 'NAM': ['AMD'], 'N1': [], 'O2': []} for group1 in agroups: for group2 in lgroups: - interaction = '%3s %3s %1s %4s %4s' \ - % (group1, group2, self.interaction_matrix[group1][group2], \ - self.sidechain_cutoffs.get_value(group1, group2)[0], \ - self.sidechain_cutoffs.get_value(group1, group2)[1]) + interaction = '%3s %3s %1s %4s %4s' % ( + group1, group2, self.interaction_matrix[group1][group2], + self.sidechain_cutoffs.get_value(group1, group2)[0], + self.sidechain_cutoffs.get_value(group1, group2)[1]) map_interaction = '' if group2 in map_: for val in map_[group2]: - map_interaction += '|%3s %3s %1s %4s %4s' \ - % (group1, val, \ - self.interaction_matrix[group1][val], \ - self.sidechain_cutoffs.get_value(group1, val)[0], \ - self.sidechain_cutoffs.get_value(group1, val)[1]) - if self.interaction_matrix[group1][val] \ - != self.interaction_matrix[group1][group2]: + map_interaction += '|%3s %3s %1s %4s %4s' % ( + group1, val, self.interaction_matrix[group1][val], + self.sidechain_cutoffs.get_value(group1, val)[0], + self.sidechain_cutoffs.get_value(group1, val)[1]) + if (self.interaction_matrix[group1][val] + != self.interaction_matrix[group1][group2]): map_interaction += '* ' - if self.sidechain_cutoffs.get_value(group1, val)[0] \ - != self.sidechain_cutoffs.get_value(group1, group2)[0] \ - or self.sidechain_cutoffs.get_value(group1, val)[1] \ - != self.sidechain_cutoffs.get_value(group1, group2)[1]: + if (self.sidechain_cutoffs.get_value(group1, val)[0] + != self.sidechain_cutoffs.get_value(group1, + group2)[0] + or self.sidechain_cutoffs.get_value(group1, + val)[1] + != self.sidechain_cutoffs.get_value(group1, + group2)[1]): map_interaction += '! ' else: map_interaction += ' ' - if len(map_[group2]) == 0 \ - and (self.sidechain_cutoffs.get_value(group1, group2)[0] \ - != 3 or self.sidechain_cutoffs.get_value(group1, group2)[1] != 4): + if (len(map_[group2]) == 0 + and (self.sidechain_cutoffs.get_value(group1, + group2)[0] + != 3 + or self.sidechain_cutoffs.get_value(group1, + group2)[1] + != 4)): map_interaction += '? ' info(interaction, map_interaction) if group1 == group2: @@ -321,14 +331,16 @@ Group1 & Group2 & Interaction & c1 &c2 \\\\ for group2 in lgroups: if self.interaction_matrix[group1][group2] == '-': continue - if self.sidechain_cutoffs.get_value(group1, group2) \ - == self.sidechain_cutoffs.default: + if (self.sidechain_cutoffs.get_value(group1, group2) + == self.sidechain_cutoffs.default): continue - str_ += '%3s & %3s & %1s & %4s & %4s\\\\ \n'\ - % (group1, group2, \ - self.interaction_matrix[group1][group2], \ - self.sidechain_cutoffs.get_value(group1, group2)[0], \ - self.sidechain_cutoffs.get_value(group1, group2)[1]) + str_ += ('%3s & %3s & %1s & %4s & %4s\\\\ \n' + % (group1, group2, + self.interaction_matrix[group1][group2], + self.sidechain_cutoffs.get_value(group1, + group2)[0], + self.sidechain_cutoffs.get_value(group1, + group2)[1])) if group1 == group2: break str_ += ' \\end{longtable}\n' @@ -367,11 +379,13 @@ Group1 & Group2 & Interaction & c1 &c2 \\\\ """ % ('l'*len(agroups), self.sidechain_cutoffs.default) for group1 in agroups: for group2 in agroups: - str_ += '%3s & %3s & %1s & %4s & %4s\\\\ \n' \ - % (group1, group2, \ - self.interaction_matrix[group1][group2], \ - self.sidechain_cutoffs.get_value(group1, group2)[0], \ - self.sidechain_cutoffs.get_value(group1, group2)[1]) + str_ += ('%3s & %3s & %1s & %4s & %4s\\\\ \n' + % (group1, group2, + self.interaction_matrix[group1][group2], + self.sidechain_cutoffs.get_value( + group1, group2)[0], + self.sidechain_cutoffs.get_value( + group1, group2)[1])) if group1 == group2: break str_ += ' \\end{longtable}\n' @@ -499,8 +513,8 @@ class PairwiseMatrix: """ if key1 in self.dictionary and key2 in self.dictionary[key1]: if key1 != key2: - str_ = 'Parameter value for %s, %s defined more than once' \ - % (key1, key2) + str_ = ('Parameter value for %s, %s defined more than once' + % (key1, key2)) warning(str_) if not key1 in self.dictionary: self.dictionary[key1] = {} From a6016ed8277b8d72774771cb2ba645099214eec5 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 20:22:28 -0700 Subject: [PATCH 49/65] Clean up line breaks for pdb.py See https://github.com/jensengroup/propka-3.1/issues/43 --- propka/pdb.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/propka/pdb.py b/propka/pdb.py index 6d76839..7d33299 100644 --- a/propka/pdb.py +++ b/propka/pdb.py @@ -25,15 +25,14 @@ def read_pdb(pdb_file, parameters, molecule): """ conformations = {} # read in all atoms in the file - lines = get_atom_lines_from_pdb(pdb_file, - ignore_residues=parameters.ignore_residues, - keep_protons=molecule.options.keep_protons, - chains=molecule.options.chains) + lines = get_atom_lines_from_pdb( + pdb_file, ignore_residues=parameters.ignore_residues, + keep_protons=molecule.options.keep_protons, + chains=molecule.options.chains) for (name, atom) in lines: if not name in conformations.keys(): - conformations[name] = ConformationContainer(name=name, - parameters=parameters, - molecular_container=molecule) + conformations[name] = ConformationContainer( + name=name, parameters=parameters, molecular_container=molecule) conformations[name].add_atom(atom) # make a sorted list of conformation names names = sorted(conformations.keys(), key=propka.lib.conformation_sorter) @@ -150,7 +149,8 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False, # and yield the atom atom = Atom(line=line) atom.terminal = terminal - if not (atom.element == 'H' and not keep_protons): #ignore hydrogen + #ignore hydrogen + if not (atom.element == 'H' and not keep_protons): yield (conformation, atom) terminal = None @@ -255,10 +255,12 @@ def write_input(molecular_container, filename): for atom in molecular_container.conformations[conformation_name].atoms: out.write(atom.make_conect_line()) # write covalently coupled groups - for group in molecular_container.conformations[conformation_name].groups: + for group in ( + molecular_container.conformations[conformation_name].groups): out.write(group.make_covalently_coupled_line()) # write non-covalently coupled groups - for group in molecular_container.conformations[conformation_name].groups: + for group in ( + molecular_container.conformations[conformation_name].groups): out.write(group.make_non_covalently_coupled_line()) out.write('ENDMDL\n') out.close() From 0ac800333d98468823dc8f9ae131e40639b744ce Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 20:25:15 -0700 Subject: [PATCH 50/65] Clean up line breaks in protonate.py. See https://github.com/jensengroup/propka-3.1/issues/43 --- propka/protonate.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/propka/protonate.py b/propka/protonate.py index e3d96ff..8218176 100644 --- a/propka/protonate.py +++ b/propka/protonate.py @@ -16,23 +16,23 @@ class Protonate: verbose: True for verbose output """ self.verbose = verbose - self.valence_electrons = {'H': 1, 'He': 2, 'Li': 1, 'Be': 2, 'B': 3, - 'C': 4, 'N': 5, 'O': 6, 'F': 7, 'Ne': 8, - 'Na': 1, 'Mg': 2, 'Al': 3, 'Si': 4, 'P': 5, - 'S': 6, 'Cl': 7, 'Ar': 8, 'K': 1, 'Ca': 2, - 'Sc': 2, 'Ti': 2, 'Va': 2, 'Cr': 1, 'Mn': 2, - 'Fe': 2, 'Co': 2, 'Ni': 2, 'Cu': 1, 'Zn': 2, - 'Ga': 3, 'Ge': 4, 'As': 5, 'Se': 6, 'Br': 7, - 'Kr': 8, 'I': 7} + self.valence_electrons = { + 'H': 1, 'He': 2, 'Li': 1, 'Be': 2, 'B': 3, 'C': 4, 'N': 5, + 'O': 6, 'F': 7, 'Ne': 8, 'Na': 1, 'Mg': 2, 'Al': 3, 'Si': 4, + 'P': 5, 'S': 6, 'Cl': 7, 'Ar': 8, 'K': 1, 'Ca': 2, 'Sc': 2, + 'Ti': 2, 'Va': 2, 'Cr': 1, 'Mn': 2, 'Fe': 2, 'Co': 2, 'Ni': 2, + 'Cu': 1, 'Zn': 2, 'Ga': 3, 'Ge': 4, 'As': 5, 'Se': 6, 'Br': 7, + 'Kr': 8, 'I': 7} # TODO - consider putting charges in a configuration file - self.standard_charges = {'ARG-NH1': 1.0, 'ASP-OD2': -1.0, - 'GLU-OE2': -1.0, 'HIS-ND1': 1.0, - 'LYS-NZ': 1.0, 'N+': 1.0, 'C-': -1.0} - self.sybyl_charges = {'N.pl3': 1, 'N.3': 1, 'N.4': 1, 'N.ar': 1, - 'O.co2-': 1} + self.standard_charges = { + 'ARG-NH1': 1.0, 'ASP-OD2': -1.0, 'GLU-OE2': -1.0, 'HIS-ND1': 1.0, + 'LYS-NZ': 1.0, 'N+': 1.0, 'C-': -1.0} + self.sybyl_charges = { + 'N.pl3': 1, 'N.3': 1, 'N.4': 1, 'N.ar': 1, 'O.co2-': 1} # TODO - consider putting bond lengths in a configuration file - self.bond_lengths = {'C': 1.09, 'N': 1.01, 'O': 0.96, 'F': 0.92, - 'Cl': 1.27, 'Br': 1.41, 'I': 1.61, 'S': 1.35} + self.bond_lengths = { + 'C': 1.09, 'N': 1.01, 'O': 0.96, 'F': 0.92, 'Cl': 1.27, + 'Br': 1.41, 'I': 1.61, 'S': 1.35} self.protonation_methods = {4: self.tetrahedral, 3: self.trigonal} def protonate(self, molecules): @@ -217,7 +217,8 @@ class Protonate: # use other atoms bonded to the neighbour to establish the # plane, if possible other_atom_indices = [] - for i, bonded_atom in enumerate(atom.bonded_atoms[0].bonded_atoms): + for i, bonded_atom in enumerate( + atom.bonded_atoms[0].bonded_atoms): if bonded_atom != atom: other_atom_indices.append(i) vec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0]) @@ -333,9 +334,8 @@ class Protonate: i = 1 for proton in atom.get_bonded_elements('H'): proton.name = 'H%s%d' % (atom.name[1:], i) - proton.residue_label = "%-3s%4d%2s" % (proton.name, - proton.res_num, - proton.chain_id) + proton.residue_label = "%-3s%4d%2s" % ( + proton.name, proton.res_num, proton.chain_id) i += 1 debug('added', new_h, 'to', atom) From d965d95fba86ed684350d37bb2f97b266afab71e Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 20:26:18 -0700 Subject: [PATCH 51/65] Clean up line breaks in run.py See https://github.com/jensengroup/propka-3.1/issues/43 --- propka/run.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/propka/run.py b/propka/run.py index b633326..deeea27 100644 --- a/propka/run.py +++ b/propka/run.py @@ -27,7 +27,8 @@ def single(pdbfile, optargs=None): .. rubric:: Example :: - single("protein.pdb", optargs=["--mutation=N25R/N181D", "-v", "--pH=7.2"]) + single("protein.pdb", optargs=["--mutation=N25R/N181D", "-v", + "--pH=7.2"]) """ optargs = optargs if optargs is not None else [] options = loadOptions(*optargs) From c2f362a10178a88f4f253c7b7f52babf73bb09ad Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 20:30:58 -0700 Subject: [PATCH 52/65] Clean up line endings for vector_algebra.py See https://github.com/jensengroup/propka-3.1/issues/43 --- propka/vector_algebra.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/propka/vector_algebra.py b/propka/vector_algebra.py index 8d874b5..cfa48a9 100644 --- a/propka/vector_algebra.py +++ b/propka/vector_algebra.py @@ -48,9 +48,12 @@ class Vector: return self.x * other.x + self.y * other.y + self.z * other.z elif isinstance(other, Matrix4x4): return Vector( - xi=other.a11*self.x + other.a12*self.y + other.a13*self.z + other.a14*1.0, - yi=other.a21*self.x + other.a22*self.y + other.a23*self.z + other.a24*1.0, - zi=other.a31*self.x + other.a32*self.y + other.a33*self.z + other.a34*1.0 + xi=other.a11*self.x + other.a12*self.y + other.a13*self.z + + other.a14*1.0, + yi=other.a21*self.x + other.a22*self.y + other.a23*self.z + + other.a24*1.0, + zi=other.a31*self.x + other.a32*self.y + other.a33*self.z + + other.a34*1.0 ) elif type(other) in [int, float]: return Vector(self.x * other, self.y * other, self.z * other) @@ -405,8 +408,7 @@ def rotate_multi_vector_around_an_axis(theta, axis, vec): raise 'Incompatible keys in rotate MultiVector' res = MultiVector() for i, key in enumerate(vec.keys): - res.vectors.append(rotate_vector_around_an_axis(theta, - axis.vectors[i], - vec.vectors[i])) + res.vectors.append(rotate_vector_around_an_axis( + theta, axis.vectors[i], vec.vectors[i])) res.keys.append(key) return res From 2bb6cf6293218b0a7f377996227e1111805493af Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 20:33:34 -0700 Subject: [PATCH 53/65] Clean up line breaks in version.py See https://github.com/jensengroup/propka-3.1/issues/43 --- propka/version.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/propka/version.py b/propka/version.py index b994cbc..8ce368d 100644 --- a/propka/version.py +++ b/propka/version.py @@ -40,31 +40,38 @@ class Version: def calculate_pair_weight(self, num_volume1, num_volume2): """Calculate pair weight using assigned model.""" - return self.weight_pair_method(self.parameters, num_volume1, num_volume2) + return self.weight_pair_method( + self.parameters, num_volume1, num_volume2) def hydrogen_bond_interaction(self, group1, group2): """Calculate H-bond energy using assigned model.""" return self.hydrogen_bond_interaction_model(group1, group2, self) - def calculate_side_chain_energy(self, distance, dpka_max, cutoff, _, f_angle): + def calculate_side_chain_energy(self, distance, dpka_max, cutoff, _, + f_angle): """Calculate sidechain energy using assigned model.""" - return self.sidechain_interaction_model(distance, dpka_max, cutoff, f_angle) + return self.sidechain_interaction_model( + distance, dpka_max, cutoff, f_angle) def electrostatic_interaction(self, group1, group2, distance): """Calculate electrostatic energy using assigned model.""" - return self.electrostatic_interaction_model(group1, group2, distance, self) + return self.electrostatic_interaction_model( + group1, group2, distance, self) def calculate_coulomb_energy(self, distance, weight): """Calculate Coulomb energy using assigned model.""" - return self.coulomb_interaction_model(distance, weight, self.parameters) + return self.coulomb_interaction_model( + distance, weight, self.parameters) def check_coulomb_pair(self, group1, group2, distance): """Check Coulomb pair using assigned model.""" - return self.check_coulomb_pair_method(self.parameters, group1, group2, distance) + return self.check_coulomb_pair_method( + self.parameters, group1, group2, distance) def calculate_backbone_reorganization(self, conformation): """Calculate backbone reorganization using assigned model.""" - return self.backbone_reorganisation_method(self.parameters, conformation) + return self.backbone_reorganisation_method( + self.parameters, conformation) def check_exceptions(self, group1, group2): """Calculate exceptions using assigned model.""" @@ -72,7 +79,8 @@ class Version: def setup_bonding_and_protonation(self, molecular_container): """Setup bonding and protonation using assigned model.""" - return self.molecular_preparation_method(self.parameters, molecular_container) + return self.molecular_preparation_method( + self.parameters, molecular_container) def setup_bonding(self, molecular_container): """Setup bonding using assigned model.""" @@ -242,8 +250,9 @@ class ElementBasedLigandInteractions(VersionA): res = self.parameters.hydrogen_bonds.get_value( elements[0], elements[1]) if not res: - info('Could not determine backbone interaction parameters for:', - backbone_atom, atom) + info( + 'Could not determine backbone interaction parameters for:', + backbone_atom, atom) return None return None From 36bea44a4dffc46d19011542f7fbb9fbe2659f28 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Mon, 25 May 2020 20:35:54 -0700 Subject: [PATCH 54/65] Clean up line breaks in tests. See https://github.com/jensengroup/propka-3.1/issues/43 --- tests/test_basic_regression.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/test_basic_regression.py b/tests/test_basic_regression.py index 60a72ba..8a4cbf0 100644 --- a/tests/test_basic_regression.py +++ b/tests/test_basic_regression.py @@ -22,7 +22,8 @@ MAX_ERR_DECIMALS = 2 TEST_DIR = Path("tests") # Location for test PDBs PDB_DIR = Path("pdb") -# Location for results for comparing output (allow running from tests/ and ../tests/) +# Location for results for comparing output (allow running from tests/ and +# ../tests/) RESULTS_DIR = Path("tests/results") if not RESULTS_DIR.is_dir(): _LOGGER.warning("Switching to sub-directory") @@ -47,7 +48,8 @@ def get_test_dirs(): if test_path.is_dir(): path_dict[key] = test_path else: - errstr = "Can't find %s test files in %s" % (key, [TEST_DIR / path, path]) + errstr = ("Can't find %s test files in %s" + % (key, [TEST_DIR / path, path])) raise FileNotFoundError(errstr) return path_dict @@ -63,11 +65,12 @@ def run_propka(options, pdb_path, tmp_path): options += [str(pdb_path)] args = propka.lib.loadOptions(options) try: - _LOGGER.warning("Working in tmpdir %s because of PROPKA file output; need to fix this.", - tmp_path) + _LOGGER.warning("Working in tmpdir %s because of PROPKA file output; " + "need to fix this.", tmp_path) cwd = Path.cwd() os.chdir(tmp_path) - molecule = propka.molecular_container.Molecular_container(str(pdb_path), args) + molecule = propka.molecular_container.Molecular_container( + str(pdb_path), args) molecule.calculate_pka() molecule.write_pka() finally: @@ -103,9 +106,11 @@ def compare_output(pdb, tmp_path, ref_path): match = re.search(r'([0-9]+\.[0-9]+)', line) value = float(match.group(0)) test_data.append(value) - errstr = "Error exceeds maximum allowed value (%d decimal places)" % MAX_ERR_DECIMALS - assert_almost_equal(test_data, ref_data, decimal=MAX_ERR_DECIMALS, - err_msg=errstr, verbose=True) + errstr = ("Error exceeds maximum allowed value (%d decimal places)" + % MAX_ERR_DECIMALS) + assert_almost_equal( + test_data, ref_data, decimal=MAX_ERR_DECIMALS, err_msg=errstr, + verbose=True) @pytest.mark.parametrize("pdb, options", [ From bcccd89ca4e9640a28a38bc4b8728203a9a353e0 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Tue, 26 May 2020 16:17:50 -0700 Subject: [PATCH 55/65] Add str.format() to atom.py. Addresses https://github.com/jensengroup/propka-3.1/issues/44 --- propka/atom.py | 71 ++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/propka/atom.py b/propka/atom.py index b66587c..2e532fb 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -5,6 +5,24 @@ import propka.group from . import hybrid36 +# Format strings that get used in multiple places (or are very complex) +PKA_FMT = "{:6.2f}" +INPUT_LINE_FMT = ( + "{type:6s}{r.numb:>5d} {atom_label} {r.res_name}{r.chain_id:>2s}" + "{r.res_num:>4d}{r.x:>12.3f}{r.y:>8.3f}{r.z:>8.3f}{group:>6s}{pka:>6s} ") +PDB_LINE_FMT1 = ( + "{type:6s}{r.numb:>5d} {atom_label} {r.res_name}{r.chain_id:>2s}" + "{r.res_num:>4d}{r.x:>12.3f}{r.y:>8.3f}{r.z:>8.3f}{r.occ:>6s}{r.beta:>6s}") +MOL2_LINE_FMT = ( + "{id:<4d} {atom_label:4s} " + "{r.x:>10.4f} {r.y:>10.4f} {r.z:>10.4f} " + "{r.sybyl_type:>6s} {r.res_num:>6d} {r.res_name:>10s} 0.0000") +PDB_LINE_FMT2 = ( + "ATOM {numb:>6d} {atom_label} {res_name}{chain_id:>2s}{res_num:>4d}" + "{x:>12.3f}{y:>8.3f}{z:>8.3f}{occ:>6.2f}{beta:>6.2f}" +) + + class Atom(object): """Atom class - contains all atom information found in the PDB file""" @@ -48,8 +66,8 @@ class Atom(object): self.num_pi_elec_conj_2_3_bonds = 0 self.groups_extracted = 0 self.set_properties(line) - self.residue_label = "%-3s%4d%2s" % ( - self.name, self.res_num, self.chain_id) + fmt = "{r.name:3s}{r.res_num:>4d}{r.chain_id:>2s}" + self.residue_label = fmt.format(r=self) # ligand atom types self.sybyl_type = '' @@ -90,6 +108,7 @@ class Atom(object): self.chain_id = '_' self.type = line[:6].strip().lower() + # TODO - define nucleic acid residue names elsewhere if self.res_name in ['DA ', 'DC ', 'DG ', 'DT ']: self.type = 'hetatm' @@ -243,13 +262,12 @@ class Atom(object): group = 'C-' ## circumventing C-/COO parameter unification if self.group.titratable: - model_pka = '%6.2f'%self.group.model_pka - str_ = "%-6s%5d %s " % ( - self.type.upper(), self.numb, - propka.lib.make_tidy_atom_label(self.name, self.element)) - str_ += "%s%2s%4d%12.3lf%8.3lf%8.3lf%6s%6s \n" % ( - self.res_name, self.chain_id, self.res_num, self.x, self.y, - self.z, group, model_pka) + model_pka = PKA_FMT.format(self.group.model_pka) + str_ = INPUT_LINE_FMT.format( + type=self.type.upper(), r=self, + atom_label=propka.lib.make_tidy_atom_label(self.name, self.element), + group=group, pka=model_pka) + str_ += "\n" return str_ def make_conect_line(self): @@ -321,12 +339,10 @@ class Atom(object): Returns: String with PDB line. """ - str_ = "%-6s%5d " % (self.type.upper(), self.numb) - str_ += "%s %s" % (propka.lib.make_tidy_atom_label(self.name, self.element), - self.res_name) - str_ += "%2s%4d%12.3lf%8.3lf%8.3lf%6s%6s\n" % (self.chain_id, self.res_num, - self.x, self.y, self.z, - self.occ, self.beta) + str_ = PDB_LINE_FMT1.format( + type=self.type.upper(), r=self, + atom_label=propka.lib.make_tidy_atom_label(self.name, self.element)) + str_ += "\n" return str_ def make_mol2_line(self, id_): @@ -339,11 +355,10 @@ class Atom(object): Returns: String with MOL2 line. """ - str_ = "%-4d %-4s " % ( - id_, propka.lib.make_tidy_atom_label(self.name, self.element)) - str_ += "%10.4f %10.4f %10.4f " % (self.x, self.y, self.z) - str_ += "%6s %6d %10s %10.4f\n" % ( - self.sybyl_type.replace('-', ''), self.res_num, self.res_name, 0.0) + str_ = MOL2_LINE_FMT.format( + id=id_, r=self, + atom_label=propka.lib.make_tidy_atom_label(self.name, self.element)) + str_ += "\n" return str_ def make_pdb_line2(self, numb=None, name=None, res_name=None, chain_id=None, @@ -377,17 +392,11 @@ class Atom(object): occ = self.occ if beta is None: beta = self.beta - str_ = "ATOM " - str_ += "%6d" % (numb) - str_ += " %s" % (propka.lib.make_tidy_atom_label(name, self.element)) - str_ += " %s" % (res_name) - str_ += "%2s" % (chain_id) - str_ += "%4d" % (res_num) - str_ += "%12.3lf" % (x) - str_ += "%8.3lf" % (y) - str_ += "%8.3lf" % (z) - str_ += "%6.2lf" % (occ) - str_ += "%6.2lf" % (beta) + str_ = PDB_LINE_FMT2.format( + numb=numb, res_name=res_name, chain_id=chain_id, res_num=res_num, + x=x, y=y, z=z, occ=occ, beta=beta, + atom_label=propka.lib.make_tidy_atom_label(name, self.element) + ) str_ += '\n' return str_ From 2305615fbef673dc3a9accf5b3e44e8718969f98 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Wed, 27 May 2020 08:01:30 -0700 Subject: [PATCH 56/65] Add line breaks to format strings. Addresses https://github.com/jensengroup/propka-3.1/issues/44#issuecomment-634233970 --- propka/atom.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/propka/atom.py b/propka/atom.py index 2e532fb..ca445f8 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -9,17 +9,18 @@ from . import hybrid36 PKA_FMT = "{:6.2f}" INPUT_LINE_FMT = ( "{type:6s}{r.numb:>5d} {atom_label} {r.res_name}{r.chain_id:>2s}" - "{r.res_num:>4d}{r.x:>12.3f}{r.y:>8.3f}{r.z:>8.3f}{group:>6s}{pka:>6s} ") + "{r.res_num:>4d}{r.x:>12.3f}{r.y:>8.3f}{r.z:>8.3f}{group:>6s}{pka:>6s} \n") PDB_LINE_FMT1 = ( "{type:6s}{r.numb:>5d} {atom_label} {r.res_name}{r.chain_id:>2s}" - "{r.res_num:>4d}{r.x:>12.3f}{r.y:>8.3f}{r.z:>8.3f}{r.occ:>6s}{r.beta:>6s}") + "{r.res_num:>4d}{r.x:>12.3f}{r.y:>8.3f}{r.z:>8.3f}{r.occ:>6s}" + "{r.beta:>6s}\n") MOL2_LINE_FMT = ( "{id:<4d} {atom_label:4s} " "{r.x:>10.4f} {r.y:>10.4f} {r.z:>10.4f} " - "{r.sybyl_type:>6s} {r.res_num:>6d} {r.res_name:>10s} 0.0000") + "{r.sybyl_type:>6s} {r.res_num:>6d} {r.res_name:>10s} 0.0000\n") PDB_LINE_FMT2 = ( "ATOM {numb:>6d} {atom_label} {res_name}{chain_id:>2s}{res_num:>4d}" - "{x:>12.3f}{y:>8.3f}{z:>8.3f}{occ:>6.2f}{beta:>6.2f}" + "{x:>12.3f}{y:>8.3f}{z:>8.3f}{occ:>6.2f}{beta:>6.2f}\n" ) @@ -267,7 +268,6 @@ class Atom(object): type=self.type.upper(), r=self, atom_label=propka.lib.make_tidy_atom_label(self.name, self.element), group=group, pka=model_pka) - str_ += "\n" return str_ def make_conect_line(self): @@ -342,7 +342,6 @@ class Atom(object): str_ = PDB_LINE_FMT1.format( type=self.type.upper(), r=self, atom_label=propka.lib.make_tidy_atom_label(self.name, self.element)) - str_ += "\n" return str_ def make_mol2_line(self, id_): @@ -358,11 +357,11 @@ class Atom(object): str_ = MOL2_LINE_FMT.format( id=id_, r=self, atom_label=propka.lib.make_tidy_atom_label(self.name, self.element)) - str_ += "\n" return str_ def make_pdb_line2(self, numb=None, name=None, res_name=None, chain_id=None, - res_num=None, x=None, y=None, z=None, occ=None, beta=None): + res_num=None, x=None, y=None, z=None, occ=None, + beta=None): """Create a PDB line. TODO - this could/should be a @property method/attribute @@ -397,7 +396,6 @@ class Atom(object): x=x, y=y, z=z, occ=occ, beta=beta, atom_label=propka.lib.make_tidy_atom_label(name, self.element) ) - str_ += '\n' return str_ def get_tidy_label(self): From 557ab19312fbf688a09738d4362f542287fce48c Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Wed, 27 May 2020 08:19:36 -0700 Subject: [PATCH 57/65] Replace complex old-style format with new format strings. --- propka/atom.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/propka/atom.py b/propka/atom.py index ca445f8..3fbaa86 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -20,8 +20,10 @@ MOL2_LINE_FMT = ( "{r.sybyl_type:>6s} {r.res_num:>6d} {r.res_name:>10s} 0.0000\n") PDB_LINE_FMT2 = ( "ATOM {numb:>6d} {atom_label} {res_name}{chain_id:>2s}{res_num:>4d}" - "{x:>12.3f}{y:>8.3f}{z:>8.3f}{occ:>6.2f}{beta:>6.2f}\n" -) + "{x:>12.3f}{y:>8.3f}{z:>8.3f}{occ:>6.2f}{beta:>6.2f}\n") +STR_FMT = ( + "{r.numb:>5d}-{r.name:>4s} {r.res_num:>5d}-{r.res_name:>3s} " + "({r.chain_id:1s}) [{r.x:>8.3f} {r.y:>8.3f} {r.z:>8.3f}] {r.element:s}") class Atom(object): @@ -152,9 +154,9 @@ class Atom(object): array of bonded atoms. """ res = [] - for ba in self.bonded_atoms: - if ba.element == element: - res.append(ba) + for bond_atom in self.bonded_atoms: + if bond_atom.element == element: + res.append(bond_atom) return res def get_bonded_heavy_atoms(self): @@ -284,7 +286,7 @@ class Atom(object): bonded.sort() for b in bonded: - res += '%5d'%b + res += '%5d' % b res += '\n' return res @@ -409,9 +411,7 @@ class Atom(object): def __str__(self): """Return an undefined-format string version of this atom.""" - return '%5d-%4s %5d-%3s (%1s) [%8.3f %8.3f %8.3f] %s' % ( - self.numb, self.name, self.res_num, self.res_name, self.chain_id, - self.x, self.y, self.z, self.element) + return STR_FMT.format(r=self) def set_residue(self, residue): """ Makes a reference to the parent residue From 6e7c188dbdea73019a7ad6247a1cb7606762cecd Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Wed, 27 May 2020 16:26:52 -0700 Subject: [PATCH 58/65] Update string formatting to modern version. Focuses only on complex formatting. --- propka/calculations.py | 12 ++--- propka/conformation_container.py | 8 ++-- propka/coupled_groups.py | 78 ++++++++++++++++---------------- propka/group.py | 26 ++++++----- propka/ligand_pka_values.py | 10 ++-- propka/output.py | 5 +- propka/parameters.py | 28 +++++++----- 7 files changed, 90 insertions(+), 77 deletions(-) diff --git a/propka/calculations.py b/propka/calculations.py index b507764..c7c84af 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -242,8 +242,8 @@ def add_trp_hydrogen(residue): elif atom.name == "CE2": ce_atom = atom if (cd_atom is None) or (ne_atom is None) or (ce_atom is None): - errstr = "Unable to find all atoms for %s %s" % (residue[0].res_name, - residue[0].res_num) + errstr = "Unable to find all atoms for %s %s" % ( + residue[0].res_name, residue[0].res_num) raise ValueError(errstr) he_atom = protonate_sp2(cd_atom, ne_atom, ce_atom) he_atom.name = "HNE" @@ -269,8 +269,8 @@ def add_amd_hydrogen(residue): or (atom.res_name == "ASN" and atom.name == "ND2")): n_atom = atom if (c_atom is None) or (o_atom is None) or (n_atom is None): - errstr = "Unable to find all atoms for %s %s" % (residue[0].res_name, - residue[0].res_num) + errstr = "Unable to find all atoms for %s %s" % ( + residue[0].res_name, residue[0].res_num) raise ValueError(errstr) h1_atom = protonate_direction(n_atom, o_atom, c_atom) h1_atom.name = "HN1" @@ -604,8 +604,8 @@ def hydrogen_bond_interaction(group1, group2, version): atoms2 = group2.get_interaction_atoms(group1) [closest_atom1, dist, closest_atom2] = get_smallest_distance(atoms1, atoms2) if None in [closest_atom1, closest_atom2]: - warning('Side chain interaction failed for %s and %s' % (group1.label, - group2.label)) + warning('Side chain interaction failed for %s and %s' % ( + group1.label, group2.label)) return None # get the parameters [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1, diff --git a/propka/conformation_container.py b/propka/conformation_container.py index e81b8ac..f86b739 100644 --- a/propka/conformation_container.py +++ b/propka/conformation_container.py @@ -575,9 +575,11 @@ class ConformationContainer: def __str__(self): """String that lists statistics of atoms and groups.""" - str_ = ( - 'Conformation container %s with %d atoms and %d groups' - % (self.name, len(self), len(self.groups))) + fmt = ( + "Conformation container {name} with {natoms:d} atoms and " + "{ngroups:d} groups") + str_ = fmt.format( + name=self.name, natoms=len(self), ngroups=len(self.groups)) return str_ def __len__(self): diff --git a/propka/coupled_groups.py b/propka/coupled_groups.py index 7220304..942fed8 100644 --- a/propka/coupled_groups.py +++ b/propka/coupled_groups.py @@ -146,31 +146,27 @@ class NonCovalentlyCoupledGroups: """ self.parameters = conformation.parameters if verbose: - info('') - info(' Warning: When using the -d option, pKa values based on ' - '\'swapped\' interactions') - info(' will be writting to the output .pka file') - info('') - info('-' * 103) - info(' Detecting non-covalently coupled residues') - info('-' * 103) - info(' Maximum pKa difference: %4.2f pKa units' - % self.parameters.max_intrinsic_pka_diff) - info(' Minimum interaction energy: %4.2f pKa units' - % self.parameters.min_interaction_energy) - info(' Maximum free energy diff.: %4.2f pKa units' - % self.parameters.max_free_energy_diff) - info(' Minimum swap pKa shift: %4.2f pKa units' - % self.parameters.min_swap_pka_shift) - info(' pH: %6s ' - % str(self.parameters.pH)) - info(' Reference: %s' - % self.parameters.reference) - info(' Min pKa: %4.2f' - % self.parameters.min_pka) - info(' Max pKa: %4.2f' - % self.parameters.max_pka) - info('') + info_fmt = ( + '\n' + ' Warning: When using the -d option, pKa values based on \n' + '\'swapped\' interactions\n' + ' will be writting to the output .pka file\n' + '\n' + '{sep}\n' + '\n' + ' Detecting non-covalently coupled residues\n' + '{sep}\n' + ' Maximum pKa difference: {c.max_intrinsic_pka_diff:>4.2f} pKa units\n' + ' Minimum interaction energy: {c.min_interaction_energy:>4.2f} pKa units\n' + ' Maximum free energy diff.: {c.max_free_energy_diff:>4.2f} pKa units\n' + ' Minimum swap pKa shift: {c.min_swap_pka_shift:>4.2f} pKa units\n' + ' pH: {c.pH:>6} \n' + ' Reference: {c.reference}\n' + ' Min pKa: {c.min_pka:>4.2f}\n' + ' Max pKa: {c.max_pka:>4.2f}\n' + '\n') + sep = "-" * 103 + info(info_fmt.format(sep=sep, c=self)) # find coupled residues titratable_groups = conformation.get_titratable_groups() if not conformation.non_covalently_coupled_groups: @@ -364,20 +360,24 @@ class NonCovalentlyCoupledGroups: formatted string with information. """ str_ = ( - """ %s and %s coupled (prot.state): %5.2f - Energy levels: %6.2f, %6.2f (difference: %6.2f) at pH %6.2f - Interaction energy: %6.2f - Intrinsic pka's: %6.2f, %6.2f (difference: %6.2f) - Swapped pKa's: %6.2f, %6.2f (difference: %6.2f, %6.2f)""" - % ( - group1.label, group2.label, data['coupling_factor'], - data['default_energy'], data['swapped_energy'], - data['default_energy'] - data['swapped_energy'], - data['pH'], data['interaction_energy'], - group1.intrinsic_pka, group2.intrinsic_pka, - group1.intrinsic_pka-group2.intrinsic_pka, - data['swapped_pka1'], data['swapped_pka2'], - data['pka_shift1'], data['pka_shift2'])) + " {label1} and {label2} coupled (prot.state): {coupl_fact:>5.2f}\n" + " Energy levels: {def_energy:>6.2f}, {swap_energy:>6.2f} " + "(difference: {diff_energy:>6.2f}) at pH {ph:>6.2f}\n" + " Interaction energy: {int_energy:>6.2f}\n" + " Intrinsic pka's: {pka1:>6.2f}, {pka2:>6.2f} " + "(difference: {diff_pka:>6.2f})\n" + " Swapped pKa's: {swap1:>6.2f}, {swap2:>6.2f} " + "(difference: {shift1:>6.2f}, {shift2:>6.2f})" + ).format( + label1=group1.label, label2=group2.label, + coupl_fact=data['coupling_factor'], def_energy=data['default_energy'], + swap_energy=data['swapped_energy'], + diff_energy=data['default_energy']-data['swapped_energy'], ph=data['pH'], + int_energy=data['interaction_energy'], pka1=group1.intrinsic_pka, + pka2=group2.intrinsic_pka, + diff_pka=group1.intrinsic_pka-group2.intrinsic_pka, + swap1=data['swapped_pka1'], swap2=data['swapped_pka2'], + shift1=data['pka_shift1'], shift2=data['pka_shift2']) return str_ diff --git a/propka/group.py b/propka/group.py index 24cb852..9b5c445 100644 --- a/propka/group.py +++ b/propka/group.py @@ -76,15 +76,18 @@ class Group: if self.atom.terminal: self.residue_type = self.atom.terminal if self.atom.type == 'atom': - self.label = '%-3s%4d%2s' % (self.residue_type, atom.res_num, - atom.chain_id) + fmt = "{g.residue_type:<3s}{a.res_num:>4d}{a.chain_id:>2s}" + self.label = fmt.format(g=self, a=atom) elif self.atom.res_name in ['DA ', 'DC ', 'DG ', 'DT ']: - self.label = '%1s%1s%1s%4d%2s' % ( - self.residue_type[1], atom.element, - atom.name.replace('\'', '')[-1], atom.res_num, atom.chain_id) + fmt = "{type:1s}{elem:1s}{name:1s}{res_num:>4d}{chain:>2s}" + self.label = fmt.format( + type=self.residue_type[1], elem=atom.element, + name=atom.name.replace('\'', '')[-1], res_num=atom.res_num, + chain=atom.chain_id) else: - self.label = '%-3s%4s%2s' % ( - self.residue_type, atom.name, atom.chain_id) + fmt = "{type:<3s}{name:>4s}{chain:>2s}" + self.label = fmt.format( + type=self.residue_type, name=atom.name, chain=atom.chain_id) # container for squared distances self.squared_distances = {} @@ -545,11 +548,10 @@ class Group: penalty = ( ' NB: Discarded due to coupling with %s' % self.coupled_titrating_group.label) - str_ = ( - " %9s %8.2lf %10.2lf %18s %s\n" - % (self.label, self.pka_value, self.model_pka, ligand_type, - penalty)) - return str_ + fmt = ( + " {g.label:>9s} {g.pka_value:8.2f} {g.model_pka:10.2f} " + "{type:>18s} {penalty:s}\n") + return fmt.format(g=self, type=ligand_type, penalty=penalty) def __str__(self): return 'Group (%s) for %s' % (self.type, self.atom) diff --git a/propka/ligand_pka_values.py b/propka/ligand_pka_values.py index ae38c3f..43e9dac 100644 --- a/propka/ligand_pka_values.py +++ b/propka/ligand_pka_values.py @@ -113,7 +113,7 @@ class LigandPkaValues: # do one molecule at the time so we don't confuse marvin molecules = propka.lib.split_atoms_into_molecules(atoms) for i, molecule in enumerate(molecules): - filename = '%s_%d.mol2'%(name, i+1) + filename = '%s_%d.mol2' % (name, i+1) self.get_marvin_pkas_for_molecule( molecule, filename=filename, reuse=reuse, num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph) @@ -141,8 +141,12 @@ class LigandPkaValues: warning(errstr) propka.pdb.write_mol2_for_atoms(atoms, filename) # Marvin calculate pKa values - options = ('pka -a %d -b %d --min %f --max %f -d large' - % (num_pkas, num_pkas, min_ph, max_ph)) + fmt = ( + 'pka -a {num1} -b {num2} --min {min_ph} ' + '--max {max_ph} -d large') + options = ( + fmt.format( + num1=num_pkas, num2=num_pkas, min_ph=min_ph, max_ph=max_ph)) (output, errors) = subprocess.Popen( [self.cxcalc, filename]+options.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() diff --git a/propka/output.py b/propka/output.py index be0e353..d2ed804 100644 --- a/propka/output.py +++ b/propka/output.py @@ -270,9 +270,10 @@ def get_charge_profile_section(protein, conformation='AVR', _=None): if profile is None: str_ += "Could not determine charge profile\n" else: - str_ += "%6s%10s%8s\n" % ("pH", "unfolded", "folded") + str_ += ' pH unfolded folded\n' for (ph, q_mod, q_pro) in profile: - str_ += "%6.2lf%10.2lf%8.2lf\n" % (ph, q_mod, q_pro) + str_ += "{ph:6.2f}{qm:10.2f}{qp:8.2f}\n".format( + ph=ph, qm=q_mod, qp=q_pro) pi_pro, pi_mod = protein.get_pi(conformation=conformation) if pi_pro is None or pi_mod is None: str_ += "Could not determine the pI\n\n" diff --git a/propka/parameters.py b/propka/parameters.py index 9210081..4bdff07 100644 --- a/propka/parameters.py +++ b/propka/parameters.py @@ -225,14 +225,17 @@ class Parameters: 'NAM': ['AMD'], 'N1': [], 'O2': []} for group1 in agroups: for group2 in lgroups: - interaction = '%3s %3s %1s %4s %4s' % ( - group1, group2, self.interaction_matrix[group1][group2], - self.sidechain_cutoffs.get_value(group1, group2)[0], - self.sidechain_cutoffs.get_value(group1, group2)[1]) + fmt = "{grp1:>3s} {grp2:>3s} {mat:1s} {val1:4} {val2:4}" + interaction = fmt.format( + grp1=group1, grp2=group2, + mat=self.interaction_matrix[group1][group2], + val1=self.sidechain_cutoffs.get_value(group1, group2)[0], + val2=self.sidechain_cutoffs.get_value(group1, group2)[1]) map_interaction = '' if group2 in map_: for val in map_[group2]: - map_interaction += '|%3s %3s %1s %4s %4s' % ( + fmt = "|{grp1:>3s} {grp2:>3s} {mat:1s} {val1:4} {val2:4}" + map_interaction += fmt.format( group1, val, self.interaction_matrix[group1][val], self.sidechain_cutoffs.get_value(group1, val)[0], self.sidechain_cutoffs.get_value(group1, val)[1]) @@ -334,13 +337,14 @@ Group1 & Group2 & Interaction & c1 &c2 \\\\ if (self.sidechain_cutoffs.get_value(group1, group2) == self.sidechain_cutoffs.default): continue - str_ += ('%3s & %3s & %1s & %4s & %4s\\\\ \n' - % (group1, group2, - self.interaction_matrix[group1][group2], - self.sidechain_cutoffs.get_value(group1, - group2)[0], - self.sidechain_cutoffs.get_value(group1, - group2)[1])) + fmt = ( + "{grp1:>3s} & {grp2:>3s} & {mat:1s} & {val1:4} & " + "{val2:4}\\\\ \n") + str_ += fmt.format( + group1, group2, + self.interaction_matrix[group1][group2], + self.sidechain_cutoffs.get_value(group1, group2)[0], + self.sidechain_cutoffs.get_value(group1, group2)[1]) if group1 == group2: break str_ += ' \\end{longtable}\n' From 3266521d31395f43e306d104b08301b1f9113fe6 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Wed, 27 May 2020 20:55:44 -0700 Subject: [PATCH 59/65] Replace old formatting with new str.format() --- propka/atom.py | 16 +++++++----- propka/bonds.py | 6 ++--- propka/calculations.py | 21 ++++++++------- propka/conformation_container.py | 4 +-- propka/coupled_groups.py | 21 ++++++++------- propka/determinant.py | 2 +- propka/group.py | 45 ++++++++++++++++++-------------- 7 files changed, 62 insertions(+), 53 deletions(-) diff --git a/propka/atom.py b/propka/atom.py index 3fbaa86..40e15b2 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -104,7 +104,7 @@ class Atom(object): self.y = float(line[38:46].strip()) self.z = float(line[46:54].strip()) self.res_num = int(line[22:26].strip()) - self.res_name = "%-3s" % (line[17:20].strip()) + self.res_name = "{0:<3s}".format(line[17:20].strip()) self.chain_id = line[21] # Set chain id to "_" if it is just white space. if not self.chain_id.strip(): @@ -124,7 +124,7 @@ class Atom(object): if len(self.name) == 4: self.element = self.element[0] if len(self.element) == 2: - self.element = '%1s%1s' % ( + self.element = '{0:1s}{1:1s}'.format( self.element[0], self.element[1].lower()) def set_group_type(self, type_): @@ -278,15 +278,15 @@ class Atom(object): Returns: String with PDB line. """ - res = 'CONECT%5d' % self.numb + res = 'CONECT{0:5d}'.format(self.numb) bonded = [] for atom in self.bonded_atoms: bonded.append(atom.numb) bonded.sort() - for b in bonded: - res += '%5d' % b + for bond in bonded: + res += '{0:5d}'.format(bond) res += '\n' return res @@ -316,12 +316,14 @@ class Atom(object): self.occ = self.occ.replace('LG', 'non_titratable_ligand') # try to initialise the group try: - group_attr = "%s_group" % self.occ + group_attr = "{0:s}_group".format(self.occ) group_attr = getattr(propka.group, group_attr) self.group = group_attr(self) except: # TODO - be more specific with expection handling here - str_ = '%s in input_file is not recognized as a group' % self.occ + str_ = ( + '{0:s} in input_file is not recognized as a group'.format( + self.occ)) raise Exception(str_) # set the model pKa value if self.beta != '-': diff --git a/propka/bonds.py b/propka/bonds.py index 0144f78..c6656df 100644 --- a/propka/bonds.py +++ b/propka/bonds.py @@ -191,7 +191,7 @@ class BondMaker: atoms: list of atoms to check for bonds """ for atom1 in atoms: - key = '%s-%s' % (atom1.res_name, atom1.name) + key = '{0:s}-{1:s}'.format(atom1.res_name, atom1.name) if key in list(self.num_pi_elec_bonds_sidechains.keys()): atom1.num_pi_elec_2_3_bonds = ( self.num_pi_elec_bonds_sidechains[key]) @@ -235,7 +235,7 @@ class BondMaker: self.num_pi_elec_conj_bonds_ligands[atom.sybyl_type]) # for protein if atom.type == 'atom': - key = '%s-%s' % (atom.res_name, atom.name) + key = '{0:s}-{1:s}'.format(atom.res_name, atom.name) if key in list(self.num_pi_elec_bonds_sidechains.keys()): atom.num_pi_elec_2_3_bonds = ( self.num_pi_elec_bonds_sidechains[key]) @@ -300,7 +300,7 @@ class BondMaker: sq_dist = propka.calculations.squared_distance(atom1, atom2) if sq_dist > self.max_sq_distance: return False - key = '%s-%s' % (atom1.element, atom2.element) + key = '{0:s}-{1:s}'.format(atom1.element, atom2.element) h_count = key.count('H') if sq_dist < self.h_dist_squared and h_count == 1: return True diff --git a/propka/calculations.py b/propka/calculations.py index c7c84af..3770983 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -242,9 +242,9 @@ def add_trp_hydrogen(residue): elif atom.name == "CE2": ce_atom = atom if (cd_atom is None) or (ne_atom is None) or (ce_atom is None): - errstr = "Unable to find all atoms for %s %s" % ( + str_ = "Unable to find all atoms for {0:s} {1:s}".format( residue[0].res_name, residue[0].res_num) - raise ValueError(errstr) + raise ValueError(str_) he_atom = protonate_sp2(cd_atom, ne_atom, ce_atom) he_atom.name = "HNE" @@ -269,9 +269,9 @@ def add_amd_hydrogen(residue): or (atom.res_name == "ASN" and atom.name == "ND2")): n_atom = atom if (c_atom is None) or (o_atom is None) or (n_atom is None): - errstr = "Unable to find all atoms for %s %s" % ( + str_ = "Unable to find all atoms for {0:s} {1:s}".format( residue[0].res_name, residue[0].res_num) - raise ValueError(errstr) + raise ValueError(str_) h1_atom = protonate_direction(n_atom, o_atom, c_atom) h1_atom.name = "HN1" h2_atom = protonate_average_direction(n_atom, c_atom, o_atom) @@ -396,10 +396,10 @@ def make_new_h(atom, x, y, z): new hydrogen atom """ new_h = propka.atom.Atom() - new_h.set_property(numb=None, name='H%s' % atom.name[1:], - res_name=atom.res_name, chain_id=atom.chain_id, - res_num=atom.res_num, x=x, y=y, z=z, occ=None, - beta=None) + new_h.set_property( + numb=None, name='H{0:s}'.format(atom.name[1:]), + res_name=atom.res_name, chain_id=atom.chain_id, + res_num=atom.res_num, x=x, y=y, z=z, occ=None, beta=None) new_h.element = 'H' new_h.bonded_atoms = [atom] new_h.charge = 0 @@ -604,8 +604,9 @@ def hydrogen_bond_interaction(group1, group2, version): atoms2 = group2.get_interaction_atoms(group1) [closest_atom1, dist, closest_atom2] = get_smallest_distance(atoms1, atoms2) if None in [closest_atom1, closest_atom2]: - warning('Side chain interaction failed for %s and %s' % ( - group1.label, group2.label)) + warning( + 'Side chain interaction failed for {0:s} and {1:s}'.format( + group1.label, group2.label)) return None # get the parameters [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1, diff --git a/propka/conformation_container.py b/propka/conformation_container.py index f86b739..d44e30c 100644 --- a/propka/conformation_container.py +++ b/propka/conformation_container.py @@ -58,7 +58,7 @@ class ConformationContainer: # if a group is coupled and we are reading a .propka_input file, then # some more configuration might be needed map_ = make_interaction_map( - 'Covalent coupling map for %s' % self, + 'Covalent coupling map for {0:s}'.format(str(self)), self.get_covalently_coupled_groups(), lambda g1, g2: g1 in g2.covalently_coupled_groups) info(map_) @@ -100,7 +100,7 @@ class ConformationContainer: self.set_common_charge_centres() # print coupling map map_ = make_interaction_map( - 'Covalent coupling map for %s' % self, + 'Covalent coupling map for {0:s}'.format(str(self)), self.get_covalently_coupled_groups(), lambda g1, g2: g1 in g2.covalently_coupled_groups) info(map_) diff --git a/propka/coupled_groups.py b/propka/coupled_groups.py index 942fed8..aeed93e 100644 --- a/propka/coupled_groups.py +++ b/propka/coupled_groups.py @@ -193,7 +193,7 @@ class NonCovalentlyCoupledGroups: conformation: conformation to print """ map_ = make_interaction_map( - 'Non-covalent coupling map for %s' % conformation, + 'Non-covalent coupling map for {0:s}'.format(str(conformation)), conformation.get_non_covalently_coupled_groups(), lambda g1, g2: g1 in g2.non_covalently_coupled_groups) info(map_) @@ -209,7 +209,8 @@ class NonCovalentlyCoupledGroups: conformation: conformation to print system: system to print """ - info('System containing %d groups:' % len(system)) + info( + 'System containing {0:d} groups:'.format(len(system))) # make list of interactions within this system interactions = list(itertools.combinations(system, 2)) # print out coupling info for each interaction @@ -233,8 +234,8 @@ class NonCovalentlyCoupledGroups: # Tell the user what is swap in this combination swap_info += 'Swapping the following interactions:\n' for interaction in combination: - swap_info += ' %s %s\n' % (interaction[0].label, - interaction[1].label) + swap_info += ' {0:s} {1:s}\n'.format( + interaction[0].label, interaction[1].label) # swap... for interaction in combination: self.swap_interactions([interaction[0]], [interaction[1]]) @@ -275,9 +276,9 @@ class NonCovalentlyCoupledGroups: all_labels = [g.label for g in system] str_ = ' ' + '-' * 113 + '\n' for group in system: - str_ += self.tagged_format(' %-8s|' % tag, - group.get_determinant_string(), - all_labels) + str_ += self.tagged_format( + ' {0:<8s}|'.format(tag), group.get_determinant_string(), + all_labels) return str_ + '\n' def swap_interactions(self, groups1, groups2, include_side_chain_hbs=True): @@ -342,10 +343,10 @@ class NonCovalentlyCoupledGroups: Returns: tagged string """ - str_ = "%s %s" % (tag, str_) - str_ = str_.replace('\n', '\n%s ' % tag) + str_ = "{0:s} {1:s}".format(tag, str_) + str_ = str_.replace('\n', '\n{0:s} '.format(tag)) for label in labels: - str_ = str_.replace(label, '\033[31m%s\033[30m' % label) + str_ = str_.replace(label, '\033[31m{0:s}\033[30m'.format(label)) return str_ + '\n' @staticmethod diff --git a/propka/determinant.py b/propka/determinant.py index 2b29d08..5370798 100644 --- a/propka/determinant.py +++ b/propka/determinant.py @@ -34,4 +34,4 @@ class Determinant: self.value += value def __str__(self): - return '%s: %8.2f' % (self.label, self.value) + return '{0:s}: {1:8.2f}'.format(self.label, self.value) diff --git a/propka/group.py b/propka/group.py index 9b5c445..125b0dc 100644 --- a/propka/group.py +++ b/propka/group.py @@ -179,7 +179,7 @@ class Group: # first check if there are any coupled groups at all if len(self.covalently_coupled_groups) == 0: return '' - line = 'CCOUPL%5d' % self.atom.numb + line = 'CCOUPL{0:5d}'.format(self.atom.numb) # extract and sort numbers of coupled groups coupled = [] for group in self.covalently_coupled_groups: @@ -187,7 +187,7 @@ class Group: coupled.sort() # write 'em out for num in coupled: - line += '%5d' % num + line += '{0:5d}'.format(num) line += '\n' return line @@ -200,7 +200,7 @@ class Group: # first check if there are any coupled groups at all if len(self.non_covalently_coupled_groups) == 0: return '' - line = 'NCOUPL%5d' % self.atom.numb + line = 'NCOUPL{0:5d}'.format(self.atom.numb) # extract and sort numbers of coupled groups coupled = [] for group in self.non_covalently_coupled_groups: @@ -208,7 +208,7 @@ class Group: coupled.sort() # write 'em out for num in coupled: - line += '%5d' % num + line += '{0:5d}'.format(num) line += '\n' return line @@ -229,9 +229,10 @@ class Group: def __iadd__(self, other): if self.type != other.type: - errstr = ('Cannot add groups of different types (%s and %s)' - % (self.type, other.type)) - raise Exception(errstr) + str_ = ( + 'Cannot add groups of different types ' + '({0:s} and {1:s})'.format(self.type, other.type)) + raise Exception(str_) # add all values self.pka_value += other.pka_value self.num_volume += other.num_volume @@ -343,9 +344,9 @@ class Group: if not self.model_pka_set: self.model_pka = self.parameters.model_pkas[self.residue_type] # check if we should apply a custom model pka - key = ( - '%s-%s' - % (self.atom.res_name.strip(), self.atom.name.strip())) + key = '{0:s}-{1:s}'.format( + self.atom.res_name.strip(), + self.atom.name.strip()) if key in self.parameters.custom_model_pkas.keys(): self.model_pka = self.parameters.custom_model_pkas[key] self.model_pka_set = True @@ -388,10 +389,10 @@ class Group: ok = False if not ok: str_ = 'Missing atoms or failed protonation for ' - str_ += ('%s (%s) -- please check the structure' - % (self.label, self.type)) + str_ += '{0:s} ({1:s}) -- please check the structure'.format( + self.label, self.type) warning(str_) - warning('%s' % self) + warning('{0:s}'.format(str(self))) num_acid = sum( [EXPECTED_ATOMS_ACID_INTERACTIONS[self.type][e] for e in EXPECTED_ATOMS_ACID_INTERACTIONS[self.type].keys()]) @@ -399,15 +400,19 @@ class Group: [EXPECTED_ATOMS_BASE_INTERACTIONS[self.type][e] for e in EXPECTED_ATOMS_BASE_INTERACTIONS[self.type].keys()]) warning( - 'Expected %d interaction atoms for acids, found:' % num_acid) + 'Expected {0:d} interaction atoms for acids, found:'.format( + num_acid)) for i in range(len(self.interaction_atoms_for_acids)): warning( - ' %s' % self.interaction_atoms_for_acids[i]) + ' {0:s}'.format( + str(self.interaction_atoms_for_acids[i]))) warning( - 'Expected %d interaction atoms for bases, found:' % num_base) + 'Expected {0:d} interaction atoms for bases, found:'.format( + num_base)) for i in range(len(self.interaction_atoms_for_bases)): warning( - ' %s' % self.interaction_atoms_for_bases[i]) + ' {0:s}'.format( + str(self.interaction_atoms_for_bases[i]))) def get_interaction_atoms(self, interacting_group): """Get atoms involved in interaction with other group. @@ -461,14 +466,14 @@ class Group: number_of_coulomb) str_ = "" for line_number in range(number_of_lines): - str_ += "%s" % (self.label) + str_ += "{0:s}".format(self.label) if line_number == 0: - str_ += " %6.2lf" %(self.pka_value) + str_ += " {0:6.2f}".format(self.pka_value) if len(self.non_covalently_coupled_groups) > 0: str_ += '*' else: str_ += ' ' - str_ += " %4d%2s " % (int(100.0*self.buried), "%") + str_ += " {0:4d}{1:>2s} ".format(int(100.0*self.buried), "%") str_ += " %6.2lf %4d" % (self.energy_volume, self.num_volume) str_ += " %6.2lf %4d" % (self.energy_local, self.num_local) else: From ba67f5149dbd1e3032876d00213b117ea39abbc9 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Thu, 28 May 2020 16:10:48 -0700 Subject: [PATCH 60/65] Replace old-style printf with str.format --- propka/coupled_groups.py | 21 +++-- propka/group.py | 32 ++++--- propka/hybrid36.py | 8 +- propka/iterative.py | 15 +-- propka/lib.py | 15 +-- propka/ligand_pka_values.py | 20 ++-- propka/molecular_container.py | 19 ++-- propka/output.py | 93 ++++++++++--------- propka/parameters.py | 170 ++++++++++++++++++---------------- propka/pdb.py | 38 ++++---- propka/protonate.py | 2 +- 11 files changed, 231 insertions(+), 202 deletions(-) diff --git a/propka/coupled_groups.py b/propka/coupled_groups.py index aeed93e..bb47dfe 100644 --- a/propka/coupled_groups.py +++ b/propka/coupled_groups.py @@ -369,16 +369,17 @@ class NonCovalentlyCoupledGroups: "(difference: {diff_pka:>6.2f})\n" " Swapped pKa's: {swap1:>6.2f}, {swap2:>6.2f} " "(difference: {shift1:>6.2f}, {shift2:>6.2f})" - ).format( - label1=group1.label, label2=group2.label, - coupl_fact=data['coupling_factor'], def_energy=data['default_energy'], - swap_energy=data['swapped_energy'], - diff_energy=data['default_energy']-data['swapped_energy'], ph=data['pH'], - int_energy=data['interaction_energy'], pka1=group1.intrinsic_pka, - pka2=group2.intrinsic_pka, - diff_pka=group1.intrinsic_pka-group2.intrinsic_pka, - swap1=data['swapped_pka1'], swap2=data['swapped_pka2'], - shift1=data['pka_shift1'], shift2=data['pka_shift2']) + ).format( + label1=group1.label, label2=group2.label, + coupl_fact=data['coupling_factor'], + def_energy=data['default_energy'], + swap_energy=data['swapped_energy'], + diff_energy=data['default_energy']-data['swapped_energy'], + ph=data['pH'], int_energy=data['interaction_energy'], + pka1=group1.intrinsic_pka, pka2=group2.intrinsic_pka, + diff_pka=group1.intrinsic_pka-group2.intrinsic_pka, + swap1=data['swapped_pka1'], swap2=data['swapped_pka2'], + shift1=data['pka_shift1'], shift2=data['pka_shift2']) return str_ diff --git a/propka/group.py b/propka/group.py index 125b0dc..9251275 100644 --- a/propka/group.py +++ b/propka/group.py @@ -473,11 +473,13 @@ class Group: str_ += '*' else: str_ += ' ' - str_ += " {0:4d}{1:>2s} ".format(int(100.0*self.buried), "%") - str_ += " %6.2lf %4d" % (self.energy_volume, self.num_volume) - str_ += " %6.2lf %4d" % (self.energy_local, self.num_local) + str_ += " {0:4d}{1:>2s} ".format(int(100.0*self.buried), "%%") + str_ += " {0:6.2f} {1:4d}".format( + self.energy_volume, int(self.num_volume)) + str_ += " {0:6.2f} {1:4d}".format( + self.energy_local, int(self.num_local)) else: - str_ += "%40s" % (" ") + str_ += "{0:>40s}".format(" ") # add the determinants for type_ in ['sidechain', 'backbone', 'coulomb']: str_ += self.get_determinant_for_string(type_, line_number) @@ -496,11 +498,11 @@ class Group: string """ if number >= len(self.determinants[type_]): - empty_determinant = "%s%4d%2s" % ("XXX", 0, "X") - return "%8.2lf %s" % (0.0, empty_determinant) + return " 0.00 XXX 0 X" else: determinant = self.determinants[type_][number] - return "%8.2lf %s" % (determinant.value, determinant.label) + return "{0:8.2f} {1:s}".format( + determinant.value, determinant.label) def calculate_total_pka(self): """Calculate total pKa based on determinants associated with this @@ -551,15 +553,16 @@ class Group: penalty = '' if self.coupled_titrating_group: penalty = ( - ' NB: Discarded due to coupling with %s' - % self.coupled_titrating_group.label) + ' NB: Discarded due to coupling with {0:s}'.format( + self.coupled_titrating_group.label)) fmt = ( " {g.label:>9s} {g.pka_value:8.2f} {g.model_pka:10.2f} " "{type:>18s} {penalty:s}\n") return fmt.format(g=self, type=ligand_type, penalty=penalty) def __str__(self): - return 'Group (%s) for %s' % (self.type, self.atom) + str_ = 'Group ({0:s}) for {1:s}'.format(self.type, str(self.atom)) + return str_ def calculate_folding_energy(self, parameters, ph=None, reference=None): """Return the electrostatic energy of this residue at specified pH. @@ -1243,8 +1246,9 @@ def is_group(parameters, atom): elif parameters.ligand_typing == 'groups': ligand_group = is_ligand_group_by_groups(parameters, atom) else: - raise Exception('Unknown ligand typing method \'%s\'' - % parameters.ligand_typing) + raise Exception( + 'Unknown ligand typing method \'{0.s}\''.format( + parameters.ligand_typing)) if ligand_group: return ligand_group return None @@ -1276,9 +1280,9 @@ def is_protein_group(parameters, atom): if atom.count_bonded_elements('O') == 1: return BBCGroup(atom) ### Filters for side chains based on PDB protein atom names - key = '%s-%s' % (atom.res_name, atom.name) + key = '{0:s}-{1:s}'.format(atom.res_name, atom.name) if key in parameters.protein_group_mapping.keys(): - class_str = "%sGroup" % parameters.protein_group_mapping[key] + class_str = "{0:s}Group".format(parameters.protein_group_mapping[key]) group_class = globals()[class_str] return group_class(atom) return None diff --git a/propka/hybrid36.py b/propka/hybrid36.py index 8acf0c1..b1e934a 100644 --- a/propka/hybrid36.py +++ b/propka/hybrid36.py @@ -20,7 +20,7 @@ def decode(input_string): Returns: integer """ - value_error_message = "invalid literal for hybrid-36 conversion: '%s'" + value_error_message = "invalid literal for hybrid-36 conversion: '{0:s}'" original_input_string = input_string input_string = input_string.strip() @@ -33,7 +33,7 @@ def decode(input_string): sign = 1 if len(input_string) == 0: - raise ValueError(value_error_message % input_string) + raise ValueError(value_error_message.format(input_string)) # See http://cci.lbl.gov/hybrid_36/ for documentation on the format. @@ -49,13 +49,13 @@ def decode(input_string): reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars) _hybrid36_set = _HYBRID36_LOWER_SET else: - raise ValueError(value_error_message % original_input_string) + raise ValueError(value_error_message.format(original_input_string)) # Check the validity of the input string: ASCII characters should be # either all uppercase or all lowercase. for char in input_string[1:]: if char not in _hybrid36_set: - raise ValueError(value_error_message % original_input_string) + raise ValueError(value_error_message.format(original_input_string)) # Convert with the int function. return sign * (int(input_string, 36) + reference) diff --git a/propka/iterative.py b/propka/iterative.py index 5bafc8e..c3f999a 100644 --- a/propka/iterative.py +++ b/propka/iterative.py @@ -211,8 +211,9 @@ def add_determinants(iterative_interactions, version, _=None): iteratives.append(new_iterative) done_group.append(group) # Initialize iterative scheme - debug("\n --- pKa iterations (%d groups, %d interactions) ---" % - (len(iteratives), len(iterative_interactions))) + debug( + "\n --- pKa iterations ({0:d} groups, {1:d} interactions) ---".format( + len(iteratives), len(iterative_interactions))) converged = False iteration = 0 # set non-iterative pka values as first step @@ -262,18 +263,18 @@ def add_determinants(iterative_interactions, version, _=None): itres.pka_iter.append(itres.pka_new) if iteration == 10: - info("did not converge in %d iterations" % (iteration)) + info("did not converge in {0:d} iterations".format(iteration)) break # printing pKa iterations # formerly was conditioned on if options.verbosity >= 2 - now unnecessary - str_ = "%12s" % (" ") + str_ = ' ' for index in range(iteration+1): - str_ += "%8d" % (index) + str_ += "{0:>8d}".format(index) debug(str_) for itres in iteratives: - str_ = "%s " % (itres.label) + str_ = "{0:s} ".format(itres.label) for pka in itres.pka_iter: - str_ += "%8.2lf" % (pka) + str_ += "{0:>8.2f}".format(pka) if not itres.converged: str_ += " *" debug(str_) diff --git a/propka/lib.py b/propka/lib.py index ab54850..6e45972 100644 --- a/propka/lib.py +++ b/propka/lib.py @@ -29,7 +29,7 @@ def open_file_for_reading(input_file): try: file_ = open(input_file, 'rt') except: - raise IOError('Cannot find file %s' % input_file) + raise IOError('Cannot find file {0:s}'.format(input_file)) return file_ @@ -52,7 +52,7 @@ def open_file_for_writing(input_file): try: file_ = open(input_file, 'wt') except FileNotFoundError: - raise Exception('Could not open %s' % input_file) + raise Exception('Could not open {0:s}'.format(input_file)) return file_ @@ -228,7 +228,7 @@ def build_parser(parser=None): group.add_argument( "-p", "--parameters", dest="parameters", default=pkg_resources.resource_filename(__name__, "propka.cfg"), - help="set the parameter file [%(default)s]") + help="set the parameter file [{default:s}]") try: group.add_argument( "--log-level", @@ -306,7 +306,8 @@ def loadOptions(args): try: chain, resnum, inscode = parse_res_string(res_str) except ValueError: - _LOGGER.critical('Invalid residue string: "%s"', res_str) + _LOGGER.critical( + 'Invalid residue string: "{0:s}"'.format(res_str)) sys.exit(1) res_list.append((chain, resnum, inscode)) options.titrate_only = res_list @@ -332,9 +333,9 @@ def make_tidy_atom_label(name, element): label = name else: # if less than 4 characters long, insert white space as needed if len(element) == 1: - label = ' %-3s' % name + label = ' {0:<3s}'.format(name) else: # The element should occupy the two first chars - label = '%-4s' % name + label = '{0:<4s}'.format(name) return label @@ -365,7 +366,7 @@ def write_file(filename, lines): """ file_ = open_file_for_writing(filename) for line in lines: - file_.write("%s\n" % (line)) + file_.write("{0:s}\n".format(line)) file_.close() diff --git a/propka/ligand_pka_values.py b/propka/ligand_pka_values.py index 43e9dac..d7a5423 100644 --- a/propka/ligand_pka_values.py +++ b/propka/ligand_pka_values.py @@ -42,7 +42,7 @@ class LigandPkaValues: map(lambda dir: os.path.join(dir, program), path))] if len(locs) == 0: - str_ = "'Error: Could not find %s." % program + str_ = "'Error: Could not find {0:s}.".format(program) str_ += ' Please make sure that it is found in the path.' info(str_) sys.exit(-1) @@ -73,7 +73,7 @@ class LigandPkaValues: max_ph: maximum pH value """ for name in molecule.conformation_names: - filename = '%s_%s' % (molecule.name, name) + filename = '{0:s}_{1:s}'.format(molecule.name, name) self.get_marvin_pkas_for_conformation_container( molecule.conformations[name], name=filename, reuse=molecule.options.reuse_ligand_mol2_file, @@ -113,7 +113,7 @@ class LigandPkaValues: # do one molecule at the time so we don't confuse marvin molecules = propka.lib.split_atoms_into_molecules(atoms) for i, molecule in enumerate(molecules): - filename = '%s_%d.mol2' % (name, i+1) + filename = '{0:s}_{1:d}.mol2'.format(name, i+1) self.get_marvin_pkas_for_molecule( molecule, filename=filename, reuse=reuse, num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph) @@ -136,8 +136,10 @@ class LigandPkaValues: propka.pdb.write_mol2_for_atoms(atoms, filename) # check that we actually have a file to work with if not os.path.isfile(filename): - errstr = ("Didn't find a user-modified file '%s' - generating one" - % filename) + errstr = ( + "Didn't find a user-modified file '{0:s}' " + "- generating one".format( + filename)) warning(errstr) propka.pdb.write_mol2_for_atoms(atoms, filename) # Marvin calculate pKa values @@ -155,11 +157,11 @@ class LigandPkaValues: '*********************************************') info('* Warning: Marvin execution failed: ' ' *') - info('* %-100s *' % errors) + info('* {0:<100s} *'.format(errors)) info('* ' ' *') info('* Please edit the ligand mol2 file and re-run PropKa with ' - 'the -l option: %29s *' % filename) + 'the -l option: {0:>29s} *'.format(filename)) info('***********************************************************' '*********************************************') sys.exit(-1) @@ -169,7 +171,7 @@ class LigandPkaValues: for i, index in enumerate(indices): atoms[index].marvin_pka = pkas[i] atoms[index].charge = {'a': -1, 'b': 1}[types[i]] - info('%s model pKa: %.2f' % (atoms[index], pkas[i])) + info('{0:s} model pKa: {1:<.2f}'.format(atoms[index], pkas[i])) @staticmethod def extract_pkas(output): @@ -188,7 +190,7 @@ class LigandPkaValues: values = values.split('\t') # format values types = [ - tags[i][0] for i in range(1, len(tags)-1) + tags[i][0] for i in range(1, len(tags)-1) if len(values) > i and values[i] != ''] indices = [int(a)-1 for a in values[-1].split(',') if a != ''] values = [float(v.replace(',', '.')) for v in values[1:-1] if v != ''] diff --git a/propka/molecular_container.py b/propka/molecular_container.py index 0c97448..3d2409f 100644 --- a/propka/molecular_container.py +++ b/propka/molecular_container.py @@ -50,7 +50,8 @@ class Molecular_container: self.version = version_class(parameters) except AttributeError as err: print(err) - errstr = 'Error: Version %s does not exist' % parameters.version + errstr = 'Error: Version {0:s} does not exist'.format( + parameters.version) raise Exception(errstr) # read the input file if input_file_extension[0:4] == '.pdb': @@ -89,7 +90,7 @@ class Molecular_container: # do some additional set up self.additional_setup_when_reading_input_file() else: - info('Unrecognized input file:%s' % input_file) + info('Unrecognized input file:{0:s}'.format(input_file)) sys.exit(-1) def top_up_conformations(self): @@ -152,8 +153,10 @@ class Molecular_container: if group_to_add: avr_group += group_to_add else: - str_ = ('Group %s could not be found in conformation %s.' - % (group.atom.residue_label, name)) + str_ = ( + 'Group {0:s} could not be found in ' + 'conformation {0:s}.'.format( + group.atom.residue_label, name)) warning(str_) # ... and store the average value avr_group = avr_group / len(self.conformation_names) @@ -178,16 +181,16 @@ class Molecular_container: options: options object """ # write out the average conformation - filename = os.path.join('%s.pka' % (self.name)) + filename = os.path.join('{0:s}.pka'.format(self.name)) # if the display_coupled_residues option is true, write the results out # to an alternative pka file if self.options.display_coupled_residues: - filename = os.path.join('%s_alt_state.pka' % (self.name)) + filename = os.path.join('{0:s}_alt_state.pka'.format(self.name)) if (hasattr(self.version.parameters, 'output_file_tag') and len(self.version.parameters.output_file_tag) > 0): filename = os.path.join( - '%s_%s.pka' % (self.name, - self.version.parameters.output_file_tag)) + '{0:s}_{1:s}.pka'.format( + self.name, self.version.parameters.output_file_tag)) propka.output.write_pka( self, self.version.parameters, filename=filename, conformation='AVR', reference=reference) diff --git a/propka/output.py b/propka/output.py index d2ed804..f533316 100644 --- a/propka/output.py +++ b/propka/output.py @@ -5,9 +5,9 @@ from propka.lib import info def print_header(): """Print header section of output.""" - str_ = "%s\n" % get_propka_header() - str_ += "%s\n" % get_references_header() - str_ += "%s\n" % get_warning_header() + str_ = "{0:s}\n".format(get_propka_header()) + str_ += "{0:s}\n".format(get_references_header()) + str_ += "{0:s}\n".format(get_warning_header()) info(str_) @@ -25,10 +25,10 @@ def write_pdb(protein, pdbfile=None, filename=None, include_hydrogens=False, if pdbfile is None: # opening file if not given if filename is None: - filename = "%s.pdb" % (protein.name) + filename = "{0:s}.pdb".format(protein.name) # TODO - this would be better as a context manager pdbfile = open(filename, 'w') - info("writing pdbfile %s" % (filename)) + info("writing pdbfile {0:s}".format(filename)) close_file = True else: # don't close the file, it was opened in a different place @@ -67,20 +67,20 @@ def write_pka(protein, parameters, filename=None, conformation='1A', # TODO - the code immediately overrides the verbose argument; why? verbose = True if filename is None: - filename = "%s.pka" % (protein.name) + filename = "{0:s}.pka".format(protein.name) # TODO - this would be much better with a context manager file_ = open(filename, 'w') if verbose: - info("Writing %s" % (filename)) + info("Writing {0:s}".format(filename)) # writing propka header - str_ = "%s\n" % get_propka_header() - str_ += "%s\n" % get_references_header() - str_ += "%s\n" % get_warning_header() + str_ = "{0:s}\n".format(get_propka_header()) + str_ += "{0:s}\n".format(get_references_header()) + str_ += "{0:s}\n".format(get_warning_header()) # writing pKa determinant section str_ += get_determinant_section(protein, conformation, parameters) # writing pKa summary section str_ += get_summary_section(protein, conformation, parameters) - str_ += "%s\n" % get_the_line() + str_ += "{0:s}\n".format(get_the_line()) # printing Folding Profile str_ += get_folding_profile_section( protein, conformation=conformation, reference=reference, @@ -109,17 +109,18 @@ def print_tm_profile(protein, reference="neutral", window=[0., 14., 1.], _: Boolean for verbosity options: options object """ - profile = protein.getTmProfile(reference=reference, grid=[0., 14., 0.1], - tms=tms, ref=ref, options=options) + profile = protein.getTmProfile( + reference=reference, grid=[0., 14., 0.1], tms=tms, ref=ref, + options=options) if profile is None: str_ = "Could not determine Tm-profile\n" else: - str_ = " suggested Tm-profile for %s\n" % (protein.name) + str_ = " suggested Tm-profile for {0:s}\n".format(protein.name) for (ph, tm_) in profile: if (ph >= window[0] and ph <= window[1] - and (ph%window[2] < 0.01 - or ph%window[2] > 0.99*window[2])): - str_ += "%6.2lf%10.2lf\n" % (ph, tm_) + and (ph % window[2] < 0.01 + or ph % window[2] > 0.99*window[2])): + str_ += "{0:>6.2f}{1:>10.2f}\n".format(ph, tm_) info(str_) @@ -160,7 +161,7 @@ def get_determinant_section(protein, conformation, parameters): string """ # getting the same order as in propka2.0 - str_ = "%s\n" % get_determinants_header() + str_ = "{0:s}\n".format(get_determinants_header()) # printing determinants for chain in protein.conformations[conformation].chains: for residue_type in parameters.write_out_order: @@ -169,8 +170,9 @@ def get_determinant_section(protein, conformation, parameters): if g.atom.chain_id == chain] for group in groups: if group.residue_type == residue_type: - str_ += "%s" % group.get_determinant_string( - parameters.remove_penalised_group) + str_ += "{0:s}".format( + group.get_determinant_string( + parameters.remove_penalised_group)) # Add a warning in case of coupled residues if (protein.conformations[conformation].non_covalently_coupled_groups and not protein.options.display_coupled_residues): @@ -190,13 +192,14 @@ def get_summary_section(protein, conformation, parameters): Returns: string """ - str_ = "%s\n" % get_summary_header() + str_ = "{0:s}\n".format(get_summary_header()) # printing pKa summary for residue_type in parameters.write_out_order: for group in protein.conformations[conformation].groups: if group.residue_type == residue_type: - str_ += "%s" % group.get_summary_string( - parameters.remove_penalised_group) + str_ += "{0:s}".format( + group.get_summary_string( + parameters.remove_penalised_group)) return str_ @@ -219,8 +222,8 @@ def get_folding_profile_section(protein, conformation='AVR', """ str_ = get_the_line() str_ += "\n" - str_ += "Free energy of %9s (kcal/mol) as a function" % direction - str_ += " of pH (using %s reference)\n" % reference + str_ += "Free energy of {0:>9s} (kcal/mol) as a function".format(direction) + str_ += " of pH (using {0:s} reference)\n".format(reference) profile, [ph_opt, dg_opt], [dg_min, dg_max], [ph_min, ph_max] = ( protein.get_folding_profile( conformation=conformation, reference=reference, @@ -230,27 +233,28 @@ def get_folding_profile_section(protein, conformation='AVR', else: for (ph, dg) in profile: if ph >= window[0] and ph <= window[1]: - if ph%window[2] < 0.05 or ph%window[2] > 0.95: - str_ += "%6.2lf%10.2lf\n" % (ph, dg) + if ph % window[2] < 0.05 or ph % window[2] > 0.95: + str_ += "{0:>6.2f}{1:>10.2f}\n".format(ph, dg) str_ += "\n" if ph_opt is None or dg_opt is None: str_ += "Could not determine pH optimum\n" else: - str_ += "The pH of optimum stability is %4.1lf" % ph_opt - str_ += (" for which the free energy is %6.1lf kcal/mol at 298K\n" - % dg_opt) + str_ += "The pH of optimum stability is {0:>4.1f}".format(ph_opt) + str_ += ( + " for which the free energy is {0:>6.1f} kcal/mol at 298K\n".format( + dg_opt)) if dg_min is None or dg_max is None: str_ += "Could not determine pH values where the free energy" - str_ += " is within 80 %s of minimum\n" % ("%") + str_ += " is within 80 %% of minimum\n" else: str_ += "The free energy is within 80 %% of maximum" - str_ += " at pH %4.1lf to %4.1lf\n" % (dg_min, dg_max) + str_ += " at pH {0:>4.1f} to {1:>4.1f}\n".format(dg_min, dg_max) if ph_min is None or ph_max is None: str_ += "Could not determine the pH-range where the free" str_ += " energy is negative\n\n" else: str_ += "The free energy is negative in the range" - str_ += " %4.1lf - %4.1lf\n\n" % (ph_min, ph_max) + str_ += " {0:>4.1f} - {1:>4.1f}\n\n".format(ph_min, ph_max) return str_ @@ -278,7 +282,7 @@ def get_charge_profile_section(protein, conformation='AVR', _=None): if pi_pro is None or pi_mod is None: str_ += "Could not determine the pI\n\n" else: - str_ += ("The pI is %5.2lf (folded) and %5.2lf (unfolded)\n") + str_ += ("The pI is {0:>5.2f} (folded) and {1:>5.2f} (unfolded)\n") return str_ @@ -290,7 +294,8 @@ def write_jackal_scap_file(mutation_data=None, filename="1xxx_scap.list", """ with open(filename, 'w') as file_: for chain_id, _, res_num, code2 in mutation_data: - str_ = "%s, %d, %s\n" % (chain_id, res_num, code2) + str_ = "{chain:s}, {num:d}, {code:s}\n".format( + chain=chain_id, num=res_num, code=code2) file_.write(str_) @@ -302,9 +307,9 @@ def write_scwrl_sequence_file(sequence, filename="x-ray.seq", _=None): with open(filename, 'w') as file_: start = 0 while len(sequence[start:]) > 60: - file_.write("%s\n" % (sequence[start:start+60])) + file_.write("{0:s}s\n".format(sequence[start:start+60])) start += 60 - file_.write("%s\n" % (sequence[start:])) + file_.write("{0:s}\n".format(sequence[start:])) def get_propka_header(): @@ -314,7 +319,7 @@ def get_propka_header(): string """ today = date.today() - str_ = "propka3.1 %93s\n" % (today) + str_ = "propka3.1 {0:>93s}\n".format(today) str_ += ("---------------------------------------------------------------" "----------------------------------------\n") str_ += ("-- " @@ -468,19 +473,19 @@ def make_interaction_map(name, list_, interaction): for i, group1 in enumerate(list_): for group2 in list_[i:]: if interaction(group1, group2): - res += 'Coupling: %9s - %9s\n' % (group1.label, - group2.label) + res += 'Coupling: {0:>9s} - {1:>9s}\n'.format( + group1.label, group2.label) return res # Name and map header - res = '%s\n%12s' % (name, '') + res = '{0:s}\n{1:>12s}'.format(name, '') for group in list_: - res += '%9s | ' % group.label + res += '{0:>9s} | '.format(group.label) # do the map for group1 in list_: - res += '\n%-12s' % (group1.label) + res += '\n{0:<12s}'.format(group1.label) for group2 in list_: tag = '' if interaction(group1, group2): tag = ' X ' - res += '%10s| '%tag + res += '{0:>10s}| '.format(tag) return res diff --git a/propka/parameters.py b/propka/parameters.py index 4bdff07..960cb81 100644 --- a/propka/parameters.py +++ b/propka/parameters.py @@ -163,7 +163,8 @@ class Parameters: value = float(words[1]) setattr(self, words[0], value) value_sq = value*value - setattr(self, "%s_squared" % words[0], value_sq) + attr = "{0:s}_squared".format(words[0]) + setattr(self, attr, value_sq) def parse_parameter(self, words): """Parse field to parameters. @@ -206,7 +207,7 @@ class Parameters: """Print interaction parameters.""" info('--------------- Model pKa values ----------------------') for k in self.model_pkas: - info('%3s %8.2f' % (k, self.model_pkas[k])) + info('{0:>3s} {1:8.2f}'.format(k, self.model_pkas[k])) info('') info('--------------- Interactions --------------------------') @@ -227,10 +228,10 @@ class Parameters: for group2 in lgroups: fmt = "{grp1:>3s} {grp2:>3s} {mat:1s} {val1:4} {val2:4}" interaction = fmt.format( - grp1=group1, grp2=group2, - mat=self.interaction_matrix[group1][group2], - val1=self.sidechain_cutoffs.get_value(group1, group2)[0], - val2=self.sidechain_cutoffs.get_value(group1, group2)[1]) + grp1=group1, grp2=group2, + mat=self.interaction_matrix[group1][group2], + val1=self.sidechain_cutoffs.get_value(group1, group2)[0], + val2=self.sidechain_cutoffs.get_value(group1, group2)[1]) map_interaction = '' if group2 in map_: for val in map_[group2]: @@ -243,21 +244,21 @@ class Parameters: != self.interaction_matrix[group1][group2]): map_interaction += '* ' if (self.sidechain_cutoffs.get_value(group1, val)[0] - != self.sidechain_cutoffs.get_value(group1, - group2)[0] - or self.sidechain_cutoffs.get_value(group1, - val)[1] - != self.sidechain_cutoffs.get_value(group1, - group2)[1]): + != self.sidechain_cutoffs.get_value( + group1, group2)[0] + or self.sidechain_cutoffs.get_value( + group1, val)[1] + != self.sidechain_cutoffs.get_value( + group1, group2)[1]): map_interaction += '! ' else: map_interaction += ' ' if (len(map_[group2]) == 0 - and (self.sidechain_cutoffs.get_value(group1, - group2)[0] + and (self.sidechain_cutoffs.get_value( + group1, group2)[0] != 3 - or self.sidechain_cutoffs.get_value(group1, - group2)[1] + or self.sidechain_cutoffs.get_value( + group1, group2)[1] != 4)): map_interaction += '? ' info(interaction, map_interaction) @@ -306,30 +307,32 @@ O2 lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - str_ = """ -\\begin{longtable}{lllll} -\\caption{Ligand interaction parameters. For interactions not listed, the default value of %s is applied.} -\\label{tab:ligand_interaction_parameters}\\\\ - -\\toprule -Group1 & Group2 & Interaction & c1 &c2 \\\\ -\\midrule -\\endfirsthead - -\\multicolumn{5}{l}{\\emph{continued from the previous page}}\\\\ -\\toprule -Group1 & Group2 & Interaction & c1 &c2 \\\\ -\\midrule -\\endhead - -\\midrule -\\multicolumn{5}{r}{\\emph{continued on the next page}}\\\\ -\\endfoot - -\\bottomrule -\\endlastfoot - -""" % (self.sidechain_cutoffs.default) + lines = [ + "", + "\\begin{{longtable}}{{lllll}}", + ("\\caption{{Ligand interaction parameters. For interactions not " + "listed, the default value of {0:s} is applied.}}").format( + self.sidechain_cutoffs.default), + "\\label{{tab:ligand_interaction_parameters}}\\\\", + "\\toprule", + "Group1 & Group2 & Interaction & c1 &c2 \\\\", + "\\midrule", + "\\endfirsthead", + "", + "\\multicolumn{{5}}{{l}}{\\emph{{continued from the previous page}}}\\\\", + "\\toprule", + "Group1 & Group2 & Interaction & c1 &c2 \\\\", + "\\midrule", + "\\endhead", + "", + "\\midrule", + "\\multicolumn{{5}}{{r}}{\\emph{{continued on the next page}}}\\\\", + "\\endfoot", + "", + "\\bottomrule", + "\\endlastfoot", + ""] + str_ = "\n".join(lines) for group1 in agroups: for group2 in lgroups: if self.interaction_matrix[group1][group2] == '-': @@ -347,7 +350,7 @@ Group1 & Group2 & Interaction & c1 &c2 \\\\ self.sidechain_cutoffs.get_value(group1, group2)[1]) if group1 == group2: break - str_ += ' \\end{longtable}\n' + str_ += ' \\end{{longtable}}\n' info(str_) def print_interactions_latex(self): @@ -357,42 +360,43 @@ Group1 & Group2 & Interaction & c1 &c2 \\\\ 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - str_ = """ -\\begin{longtable}{%s} -\\caption{Ligand interaction parameters. For interactions not listed, the default value of %s is applied.} -\\label{tab:ligand_interaction_parameters}\\\\ - -\\toprule -Group1 & Group2 & Interaction & c1 &c2 \\\\ -\\midrule -\\endfirsthead - -\\multicolumn{5}{l}{\\emph{continued from the previous page}}\\\\ -\\toprule -Group1 & Group2 & Interaction & c1 &c2 \\\\ -\\midrule -\\endhead - -\\midrule -\\multicolumn{5}{r}{\\emph{continued on the next page}}\\\\ -\\endfoot - -\\bottomrule -\\endlastfoot - -""" % ('l'*len(agroups), self.sidechain_cutoffs.default) + lines = [ + "", + "\\begin{longtable}{{{0:s}}}".format('l'*len(agroups)), + ("\\caption{{Ligand interaction parameters. For interactions not " + "listed, the default value of {0:s} is applied.}}").format( + str(self.sidechain_cutoffs.default)), + "\\label{{tab:ligand_interaction_parameters}}\\\\", + "\\toprule", + "Group1 & Group2 & Interaction & c1 &c2 \\\\", + "\\midrule", + "\\endfirsthead", + "", + "\\multicolumn{{5}}{{l}}{\\emph{{continued from the previous page}}}\\\\", + "\\toprule", + "Group1 & Group2 & Interaction & c1 &c2 \\\\", + "\\midrule", + "\\endhead", + "", + "\\midrule", + "\\multicolumn{{5}}{{r}}{\\emph{{continued on the next page}}}\\\\", + "\\endfoot", + "", + "\\bottomrule", + "\\endlastfoot", + "" + ] + str_ = "\n".join(lines) for group1 in agroups: for group2 in agroups: - str_ += ('%3s & %3s & %1s & %4s & %4s\\\\ \n' - % (group1, group2, - self.interaction_matrix[group1][group2], - self.sidechain_cutoffs.get_value( - group1, group2)[0], - self.sidechain_cutoffs.get_value( - group1, group2)[1])) + fmt = '{g1:>3s} & {g2:>3s} & {mat:1s} & {val1:>4s} & {val2:>4s}\\\\ \n' + str_ += fmt.format( + group1, group2, self.interaction_matrix[group1][group2], + str(self.sidechain_cutoffs.get_value(group1, group2)[0]), + str(self.sidechain_cutoffs.get_value(group1, group2)[1])) if group1 == group2: break - str_ += ' \\end{longtable}\n' + str_ += ' \\end{{longtable}}\n' info(str_) @@ -450,7 +454,8 @@ class InteractionMatrix: group: group to get """ if group not in self.dictionary.keys(): - str_ = '%s not found in interaction matrix %s' % (group, self.name) + str_ = '{0:s} not found in interaction matrix {1:s}'.format( + group, self.name) raise KeyError(str_) return self.dictionary[group] @@ -465,12 +470,12 @@ class InteractionMatrix: def __str__(self): str_ = ' ' for key in self.ordered_keys: - str_ += '%3s ' % key + str_ += '{0:>3s} '.format(key) str_ += '\n' for key1 in self.ordered_keys: - str_ += '%3s ' % key1 + str_ += '{0:>3s} '.format(key1) for key2 in self.ordered_keys: - str_ += '%3s ' % self[key1][key2] + str_ += '{0:>3s} '.format(self[key1][key2]) str_ += '\n' return str_ @@ -517,8 +522,9 @@ class PairwiseMatrix: """ if key1 in self.dictionary and key2 in self.dictionary[key1]: if key1 != key2: - str_ = ('Parameter value for %s, %s defined more than once' - % (key1, key2)) + str_ = ( + 'Parameter value for {0:s}, {1:s} defined more ' + 'than once'.format(key1, key2)) warning(str_) if not key1 in self.dictionary: self.dictionary[key1] = {} @@ -547,7 +553,8 @@ class PairwiseMatrix: matrix information """ if group not in self.dictionary.keys(): - str_ = '%s not found in interaction matrix %s' % (group, self.name) + str_ = '{0:s} not found in interaction matrix {1:s}'.format( + group, self.name) raise KeyError(str_) return self.dictionary[group] @@ -563,5 +570,6 @@ class PairwiseMatrix: str_ = '' for key1 in self.keys(): for key2 in self[key1].keys(): - str_ += '%s %s %s\n' % (key1, key2, self[key1][key2]) + str_ += '{0:s} {1:s} {2:s}\n'.format( + key1, key2, self[key1][key2]) return str_ diff --git a/propka/pdb.py b/propka/pdb.py index 7d33299..0074938 100644 --- a/propka/pdb.py +++ b/propka/pdb.py @@ -58,23 +58,25 @@ def protein_precheck(conformations, names): atoms_by_residue[res_id] = [atom] for res_id, res_atoms in atoms_by_residue.items(): res_name = res_atoms[0].res_name - residue_label = '%3s%5s'%(res_name, res_id) + residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id) # ignore ligand residues if res_name not in EXPECTED_ATOM_NUMBERS: continue # check for c-terminal if 'C-' in [a.terminal for a in res_atoms]: if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1: - str_ = ("Unexpected number (%d) of atoms in residue %s " - "in conformation %s" % (len(res_atoms), - residue_label, name)) + str_ = ("Unexpected number ({num:d}) of atoms in residue " + "{res:s} in conformation {conf:s}".format( + num=len(res_atoms), res=residue_label, + conf=name)) warning(str_) continue # check number of atoms in residue if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]: - str_ = ('Unexpected number (%d) of atoms in residue %s ' - 'in conformation %s' % (len(res_atoms), - residue_label, name)) + str_ = ("Unexpected number ({num:d}) of atoms in residue " + "{res:s} in conformation {conf:s}".format( + num=len(res_atoms), res=residue_label, + conf=name)) warning(str_) @@ -86,7 +88,8 @@ def resid_from_atom(atom): Returns string """ - return '%4d %s %s' % (atom.res_num, atom.chain_id, atom.icode) + return '{0:>4d} {1:s} {2:s}'.format( + atom.res_num, atom.chain_id, atom.icode) def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False, @@ -136,7 +139,7 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False, alt_conf_tag = chr(ord(alt_conf_tag)+16) if alt_conf_tag == ' ': alt_conf_tag = 'A' - conformation = '%d%s'%(model, alt_conf_tag) + conformation = '{0:d}{1:s}'.format(model, alt_conf_tag) # set the terminal if tag == 'ATOM ': if (residue_name.strip() == 'N' @@ -190,7 +193,7 @@ def write_mol2_for_atoms(atoms, filename): filename: name of file """ # TODO - header needs to be converted to format string - header = '@MOLECULE\n\n%d %d\nSMALL\nUSER_CHARGES\n' + header = '@MOLECULE\n\n{natom:d} {id:d}\nSMALL\nUSER_CHARGES\n' atoms_section = '@ATOM\n' for i, atom in enumerate(atoms): atoms_section += atom.make_mol2_line(i+1) @@ -200,15 +203,16 @@ def write_mol2_for_atoms(atoms, filename): for j, atom2 in enumerate(atoms, i+1): if atom1 in atom2.bonded_atoms: type_ = get_bond_order(atom1, atom2) - bonds_section += '%7d %7d %7d %7s\n' % (id_, i+1, j+1, type_) + bonds_section += '{0:>7d} {1:>7d} {2:>7d} {3:>7s}\n'.format( + id_, i+1, j+1, type_) id_ += 1 substructure_section = '@SUBSTRUCTURE\n\n' if len(atoms) > 0: - substructure_section = ('@SUBSTRUCTURE\n%-7d %10s %7d\n' - % (atoms[0].res_num, atoms[0].res_name, - atoms[0].numb)) + substructure_section = ( + '@SUBSTRUCTURE\n{0:<7d} {1:>10s} {2:>7d}\n'.format( + atoms[0].res_num, atoms[0].res_name, atoms[0].numb)) out = propka.lib.open_file_for_writing(filename) - out.write(header % (len(atoms), id_-1)) + out.write(header.format(natom=len(atoms), id=id_-1)) out.write(atoms_section) out.write(bonds_section) out.write(substructure_section) @@ -232,7 +236,7 @@ def get_bond_order(atom1, atom2): if '.ar' in atom2.sybyl_type: pi_electrons2 -= 1 if pi_electrons1 > 0 and pi_electrons2 > 0: - type_ = '%d' % (min(pi_electrons1, pi_electrons2)+1) + type_ = '{0:d}'.format(min(pi_electrons1, pi_electrons2)+1) if '.ar' in atom1.sybyl_type and '.ar' in atom2.sybyl_type: type_ = 'ar' return type_ @@ -247,7 +251,7 @@ def write_input(molecular_container, filename): """ out = propka.lib.open_file_for_writing(filename) for conformation_name in molecular_container.conformation_names: - out.write('MODEL %s\n' % conformation_name) + out.write('MODEL {0:s}\n'.format(conformation_name)) # write atoms for atom in molecular_container.conformations[conformation_name].atoms: out.write(atom.make_input_line()) diff --git a/propka/protonate.py b/propka/protonate.py index 8218176..cd0f6c8 100644 --- a/propka/protonate.py +++ b/propka/protonate.py @@ -71,7 +71,7 @@ class Protonate: """ # atom is a protein atom if atom.type == 'atom': - key = '%3s-%s' % (atom.res_name, atom.name) + key = '{0:3s}-{1:s}'.format(atom.res_name, atom.name) if atom.terminal: debug(atom.terminal) key = atom.terminal From 87347a7d60acf3f8a355dc386b7c980019e0c605 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Thu, 28 May 2020 20:31:03 -0700 Subject: [PATCH 61/65] Modernize print statements with str.format() --- propka/protonate.py | 80 ++++++++++++++++++---------------- propka/run.py | 2 +- propka/vector_algebra.py | 19 ++++---- propka/version.py | 2 +- tests/test_basic_regression.py | 26 ++++++----- 5 files changed, 70 insertions(+), 59 deletions(-) diff --git a/propka/protonate.py b/propka/protonate.py index cd0f6c8..bd74894 100644 --- a/propka/protonate.py +++ b/propka/protonate.py @@ -113,7 +113,7 @@ class Protonate: i = 1 for bonded in heavy_atom.bonded_atoms: if bonded.element == 'H': - bonded.name += '%d' % i + bonded.name += str(i) i += 1 def set_number_of_protons_to_add(self, atom): @@ -125,15 +125,16 @@ class Protonate: debug('*'*10) debug('Setting number of protons to add for', atom) atom.number_of_protons_to_add = 8 - debug(' %4d' % 8) + debug(" 8") atom.number_of_protons_to_add -= self.valence_electrons[atom.element] - debug('Valence eletrons: %4d' % -self.valence_electrons[atom.element]) + debug('Valence electrons: {0:>4d}'.format( + -self.valence_electrons[atom.element])) atom.number_of_protons_to_add -= len(atom.bonded_atoms) - debug('Number of bonds: %4d' % -len(atom.bonded_atoms)) + debug('Number of bonds: {0:>4d}'.format(-len(atom.bonded_atoms))) atom.number_of_protons_to_add -= atom.num_pi_elec_2_3_bonds - debug('Pi electrons: %4d' % -atom.num_pi_elec_2_3_bonds) + debug('Pi electrons: {0:>4d}'.format(-atom.num_pi_elec_2_3_bonds)) atom.number_of_protons_to_add += int(atom.charge) - debug('Charge: %4.1f' % atom.charge) + debug('Charge: {0:>4.1f}'.format(atom.charge)) debug('-'*10) debug(atom.number_of_protons_to_add) @@ -149,35 +150,37 @@ class Protonate: debug('='*10) debug('Setting steric number and lone pairs for', atom) atom.steric_number = 0 - debug('%65s: %4d' % ('Valence electrons', - self.valence_electrons[atom.element])) + debug('{0:>65s}: {1:>4d}'.format( + 'Valence electrons', self.valence_electrons[atom.element])) atom.steric_number += self.valence_electrons[atom.element] - debug('%65s: %4d' % ('Number of bonds', - len(atom.bonded_atoms))) + debug('{0:>65s}: {1:>4d}'.format( + 'Number of bonds', len(atom.bonded_atoms))) atom.steric_number += len(atom.bonded_atoms) - debug('%65s: %4d' % ('Number of hydrogen atoms to add', - atom.number_of_protons_to_add)) + debug('{0:>65s}: {1:>4d}'.format( + 'Number of hydrogen atoms to add', atom.number_of_protons_to_add)) atom.steric_number += atom.number_of_protons_to_add - debug('%65s: %4d' % ('Number of pi-electrons in double ' - 'and triple bonds(-)', - atom.num_pi_elec_2_3_bonds)) + debug('{0:>65s}: {1:>4d}'.format( + 'Number of pi-electrons in double and triple bonds(-)', + atom.num_pi_elec_2_3_bonds)) atom.steric_number -= atom.num_pi_elec_2_3_bonds - debug('%65s: %4d' % ('Number of pi-electrons in conjugated double and ' - 'triple bonds(-)', - atom.num_pi_elec_conj_2_3_bonds)) + debug('{0:>65s}: {1:>4d}'.format( + 'Number of pi-electrons in conjugated double and triple bonds(-)', + atom.num_pi_elec_conj_2_3_bonds)) atom.steric_number -= atom.num_pi_elec_conj_2_3_bonds - debug('%65s: %4d' % ('Number of donated co-ordinated bonds', 0)) + debug('{0:>65s}: {1:>4d}'.format( + 'Number of donated co-ordinated bonds', 0)) atom.steric_number += 0 - debug('%65s: %4.1f' % ('Charge(-)', atom.charge)) + debug('{0:>65s}: {1:>4.1f}'.format( + 'Charge(-)', atom.charge)) atom.steric_number -= atom.charge atom.steric_number = math.floor(atom.steric_number/2.0) - atom.number_of_lone_pairs = (atom.steric_number - - len(atom.bonded_atoms) - - atom.number_of_protons_to_add) + atom.number_of_lone_pairs = ( + atom.steric_number-len(atom.bonded_atoms)-atom.number_of_protons_to_add) debug('-'*70) - debug('%65s: %4d' % ('Steric number', atom.steric_number)) - debug('%65s: %4d' % ('Number of lone pairs', - atom.number_of_lone_pairs)) + debug('{0:>65s}: {1:>4d}'.format( + 'Steric number', atom.steric_number)) + debug('{0:>65s}: {1:>4d}'.format( + 'Number of lone pairs', atom.number_of_lone_pairs)) atom.steric_num_lone_pairs_set = True def add_protons(self, atom): @@ -191,8 +194,8 @@ class Protonate: if atom.steric_number in list(self.protonation_methods.keys()): self.protonation_methods[atom.steric_number](atom) else: - warning('Do not have a method for protonating', - atom, '(steric number: %d)' % atom.steric_number) + warning('Do not have a method for protonating', atom, + '(steric number: {0:d})'.format(atom.steric_number)) def trigonal(self, atom): """Add hydrogens in trigonal geometry. @@ -200,7 +203,7 @@ class Protonate: Args: atom: atom to protonate """ - debug('TRIGONAL - %d bonded atoms' % len(atom.bonded_atoms)) + debug('TRIGONAL - {0:d} bonded atoms'.format(len(atom.bonded_atoms))) rot_angle = math.radians(120.0) cvec = Vector(atom1=atom) # 0 bonds @@ -258,7 +261,8 @@ class Protonate: Args: atom: atom to protonate. """ - debug('TETRAHEDRAL - %d bonded atoms' % len(atom.bonded_atoms)) + debug( + 'TETRAHEDRAL - {0:d} bonded atoms'.format(len(atom.bonded_atoms))) # TODO - might be good to move tetrahedral angle to constant rot_angle = math.radians(109.5) cvec = Vector(atom1=atom) @@ -304,7 +308,7 @@ class Protonate: new_h = propka.atom.Atom() new_h.set_property( numb=None, - name='H%s' % atom.name[1:], + name='H{0:s}'.format(atom.name[1:]), res_name=atom.res_name, chain_id=atom.chain_id, res_num=atom.res_num, @@ -327,14 +331,15 @@ class Protonate: atom.number_of_protons_to_add -= 1 atom.conformation_container.add_atom(new_h) # update names of all protons on this atom - new_h.residue_label = "%-3s%4d%2s" % (new_h.name, new_h.res_num, - new_h.chain_id) + new_h.residue_label = "{0:<3s}{1:>4d}{2:>2s}".format( + new_h.name, new_h.res_num, new_h.chain_id) no_protons = atom.count_bonded_elements('H') if no_protons > 1: i = 1 for proton in atom.get_bonded_elements('H'): - proton.name = 'H%s%d' % (atom.name[1:], i) - proton.residue_label = "%-3s%4d%2s" % ( + proton.name = 'H{0:s}{1:d}'.format( + atom.name[1:], i) + proton.residue_label = "{0:<3s}{1:>4d}{2:>2s}".format( proton.name, proton.res_num, proton.chain_id) i += 1 debug('added', new_h, 'to', atom) @@ -352,8 +357,9 @@ class Protonate: if element in list(self.bond_lengths.keys()): dist = self.bond_lengths[element] else: - str_ = ('Bond length for %s not found, using the standard value ' - 'of %f' % (element, dist)) + str_ = ( + 'Bond length for {0:s} not found, using the standard value ' + 'of {1:f}'.format(element, dist)) warning(str_) bvec = bvec.rescale(dist) return bvec diff --git a/propka/run.py b/propka/run.py index deeea27..e851309 100644 --- a/propka/run.py +++ b/propka/run.py @@ -34,7 +34,7 @@ def single(pdbfile, optargs=None): options = loadOptions(*optargs) pdbfile = options.filenames.pop(0) if len(options.filenames) > 0: - _LOGGER.warning("Ignoring filenames: %s", options.filenames) + _LOGGER.warning("Ignoring filenames: {0:s}".format(options.filenames)) my_molecule = Molecular_container(pdbfile, options) my_molecule.calculate_pka() my_molecule.write_pka() diff --git a/propka/vector_algebra.py b/propka/vector_algebra.py index cfa48a9..1303de5 100644 --- a/propka/vector_algebra.py +++ b/propka/vector_algebra.py @@ -58,7 +58,7 @@ class Vector: elif type(other) in [int, float]: return Vector(self.x * other, self.y * other, self.z * other) else: - info('%s not supported' % type(other)) + info('{0:s} not supported'.format(type(other))) raise TypeError def __rmul__(self, other): @@ -85,7 +85,8 @@ class Vector: return math.sqrt(self.sq_length()) def __str__(self): - return '%10.4f %10.4f %10.4f'%(self.x, self.y, self.z) + return '{0:>10.4f} {1:>10.4f} {2:>10.4f}'.format( + self.x, self.y, self.z) def __repr__(self): return '' @@ -100,9 +101,7 @@ class Vector: def rescale(self, new_length): """ Rescale vector to new length while preserving direction """ frac = new_length/(self.length()) - res = Vector(xi=self.x*frac, - yi=self.y*frac, - zi=self.z*frac) + res = Vector(xi=self.x*frac, yi=self.y*frac, zi=self.z*frac) return res @@ -296,7 +295,8 @@ class MultiVector: keys2 = get_sorted_configurations(atom2.configurations.keys()) if self.keys != keys2: str_ = ('Cannot make multi vector: Atomic configurations ' - 'mismatch for\n %s\n %s\n' % (atom1, atom2)) + 'mismatch for\n {0:s}\n {1:s}\n'.format( + atom1, atom2)) raise KeyError(str_) for key in self.keys: atom1.setConfiguration(key) @@ -314,7 +314,7 @@ class MultiVector: def __str__(self): res = '' for i, key in enumerate(self.keys): - res += '%s %s\n' % (key, self.vectors[i]) + res += '{0:s} {1:s}\n'.format(key, self.vectors[i]) return res def do_job(self, job): @@ -350,8 +350,9 @@ class MultiVector: for i in range(len(self.vectors)): self.result.vectors.append( # TODO - eliminate eval() or entire class - eval('self.vectors[%d] %s other.vectors[%d]' - % (i, operation, i))) + eval( + 'self.vectors[{0:d}] {1:s} other.vectors[{2:d}]'.format( + i, operation, i))) self.result.keys.append(self.keys[i]) def __add__(self, other): diff --git a/propka/version.py b/propka/version.py index 8ce368d..9ff5d87 100644 --- a/propka/version.py +++ b/propka/version.py @@ -31,7 +31,7 @@ class Version: Raises: NotImplementedError """ - err = "Called an empty Version function with args %s" % args + err = "Called an empty Version function with args {0:s}".format(args) raise NotImplementedError(err) def calculate_desolvation(self, group): diff --git a/tests/test_basic_regression.py b/tests/test_basic_regression.py index 8a4cbf0..cd8036e 100644 --- a/tests/test_basic_regression.py +++ b/tests/test_basic_regression.py @@ -48,8 +48,9 @@ def get_test_dirs(): if test_path.is_dir(): path_dict[key] = test_path else: - errstr = ("Can't find %s test files in %s" - % (key, [TEST_DIR / path, path])) + errstr = ( + "Can't find {0:s} test files in {1:s}".format( + key, [TEST_DIR / path, path])) raise FileNotFoundError(errstr) return path_dict @@ -65,8 +66,9 @@ def run_propka(options, pdb_path, tmp_path): options += [str(pdb_path)] args = propka.lib.loadOptions(options) try: - _LOGGER.warning("Working in tmpdir %s because of PROPKA file output; " - "need to fix this.", tmp_path) + _LOGGER.warning( + "Working in tmpdir {0:s} because of PROPKA file output; " + "need to fix this.".format(str(tmp_path))) cwd = Path.cwd() os.chdir(tmp_path) molecule = propka.molecular_container.Molecular_container( @@ -93,7 +95,7 @@ def compare_output(pdb, tmp_path, ref_path): ref_data.append(float(line)) test_data = [] - pka_path = Path(tmp_path) / ("%s.pka" % pdb) + pka_path = Path(tmp_path) / ("{0:s}.pka".format(pdb)) with open(pka_path, "rt") as pka_file: at_pka = False for line in pka_file: @@ -106,8 +108,9 @@ def compare_output(pdb, tmp_path, ref_path): match = re.search(r'([0-9]+\.[0-9]+)', line) value = float(match.group(0)) test_data.append(value) - errstr = ("Error exceeds maximum allowed value (%d decimal places)" - % MAX_ERR_DECIMALS) + errstr = ( + "Error exceeds maximum allowed value ({0:d} decimal places)".format( + MAX_ERR_DECIMALS)) assert_almost_equal( test_data, ref_data, decimal=MAX_ERR_DECIMALS, err_msg=errstr, verbose=True) @@ -126,17 +129,18 @@ def compare_output(pdb, tmp_path, ref_path): def test_regression(pdb, options, tmp_path): """Basic regression test of PROPKA functionality.""" path_dict = get_test_dirs() - ref_path = path_dict["results"] / ("%s.dat" % pdb) + ref_path = path_dict["results"] / ("{0:s}.dat".format(pdb)) if ref_path.is_file(): ref_path = ref_path.resolve() else: - _LOGGER.warning("Missing results file for comparison: %s", ref_path) + _LOGGER.warning("Missing results file for comparison: {0:s}".format( + str(ref_path))) ref_path = None - pdb_path = path_dict["pdbs"] / ("%s.pdb" % pdb) + pdb_path = path_dict["pdbs"] / ("{0:s}.pdb".format(pdb)) if pdb_path.is_file(): pdb_path = pdb_path.resolve() else: - errstr = "Missing PDB file: %s" % pdb_path + errstr = "Missing PDB file: {0:s}".format(pdb_path) raise FileNotFoundError(errstr) tmp_path = Path(tmp_path).resolve() From b54190ef279b9acd98f6b52cfc861be14fd94e84 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Fri, 29 May 2020 20:08:41 -0700 Subject: [PATCH 62/65] Fix complicated string formatting. PEP8 be damned! https://www.python.org/dev/peps/pep-0008/#a-foolish-consistency-is-the-hobgoblin-of-little-minds Addresses https://github.com/jensengroup/propka-3.1/pull/40#discussion_r432302025 --- propka/output.py | 125 +++++++++++++++++++---------------------------- 1 file changed, 49 insertions(+), 76 deletions(-) diff --git a/propka/output.py b/propka/output.py index f533316..b8809f9 100644 --- a/propka/output.py +++ b/propka/output.py @@ -319,40 +319,26 @@ def get_propka_header(): string """ today = date.today() - str_ = "propka3.1 {0:>93s}\n".format(today) - str_ += ("---------------------------------------------------------------" - "----------------------------------------\n") - str_ += ("-- " - " --\n") - str_ += ("-- PROPKA: A PROTEIN PKA " - "PREDICTOR --\n") - str_ += ("-- " - " --\n") - str_ += ("-- VERSION 1.0, 04/25/2004, " - "IOWA CITY --\n") - str_ += ("-- BY HUI LI " - " --\n") - str_ += ("-- " - " --\n") - str_ += ("-- VERSION 2.0, 11/05/2007, IOWA " - "CITY/COPENHAGEN --\n") - str_ += ("-- BY DELPHINE C. BAS AND DAVID " - "M. ROGERS --\n") - str_ += ("-- " - " --\n") - str_ += ("-- VERSION 3.0, 01/06/2011, " - "COPENHAGEN --\n") - str_ += ("-- BY MATS H.M. OLSSON AND CHRESTEN " - "R. SONDERGARD --\n") - str_ += ("-- " - " --\n") - str_ += ("-- VERSION 3.1, 07/01/2011, " - "COPENHAGEN --\n") - str_ += ("-- BY CHRESTEN R. SONDERGARD AND " - "MATS H.M. OLSSON --\n") - str_ += ("---------------------------------------------------------------" - "----------------------------------------\n") - str_ += ("\n") + str_ = "propka3.1 {0!s:>93s}\n".format(today) + str_ += """ +------------------------------------------------------------------------------- +-- -- +-- PROPKA: A PROTEIN PKA PREDICTOR -- +-- -- +-- VERSION 1.0, 04/25/2004, IOWA CITY -- +-- BY HUI LI -- +-- -- +-- VERSION 2.0, 11/05/2007, IOWA CITY/COPENHAGEN -- +-- BY DELPHINE C. BAS AND DAVID M. ROGERS -- +-- -- +-- VERSION 3.0, 01/06/2011, COPENHAGEN -- +-- BY MATS H.M. OLSSON AND CHRESTEN R. SONDERGARD -- +-- -- +-- VERSION 3.1, 07/01/2011, COPENHAGEN -- +-- BY CHRESTEN R. SONDERGARD AND MATS H.M. OLSSON -- +-- -- +------------------------------------------------------------------------------- +""" return str_ @@ -362,39 +348,29 @@ def get_references_header(): Returns: string """ - str_ = "" - str_ += ("---------------------------------------------------------------" - "----------------------------------------\n") - str_ += (" References:\n") - str_ += ("\n") - str_ += (" Very Fast Empirical Prediction and Rationalization of " - "Protein pKa Values\n") - str_ += (" Hui Li, Andrew D. Robertson and Jan H. Jensen\n") - str_ += (" PROTEINS: Structure, Function, and Bioinformatics 61:704-721" - " (2005)\n") - str_ += (" \n") - str_ += (" Very Fast Prediction and Rationalization of pKa Values for " - "Protein-Ligand Complexes\n") - str_ += (" Delphine C. Bas, David M. Rogers and Jan H. Jensen\n") - str_ += (" PROTEINS: Structure, Function, and Bioinformatics 73:765-" - "783 (2008)\n") - str_ += (" \n") - str_ += (" PROPKA3: Consistent Treatment of Internal and Surface " - "Residues in Empirical pKa predictions\n") - str_ += (" Mats H.M. Olsson, Chresten R. Sondergard, Michal Rostkowski, " - "and Jan H. Jensen\n") - str_ += (" Journal of Chemical Theory and Computation, 7(2):525-537 " - "(2011)\n") - str_ += (" \n") - str_ += (" Improved Treatment of Ligands and Coupling Effects in " - "Empirical Calculation\n") - str_ += (" and Rationalization of pKa Values\n") - str_ += (" Chresten R. Sondergaard, Mats H.M. Olsson, Michal " - "Rostkowski, and Jan H. Jensen\n") - str_ += (" Journal of Chemical Theory and Computation, (2011)\n") - str_ += (" \n") - str_ += ("--------------------------------------------------------------" - "-----------------------------------------\n") + str_ = """ +------------------------------------------------------------------------------- +References: + +Very Fast Empirical Prediction and Rationalization of Protein pKa Values. +Hui Li, Andrew D. Robertson and Jan H. Jensen. PROTEINS: Structure, Function, +and Bioinformatics. 61:704-721 (2005) + +Very Fast Prediction and Rationalization of pKa Values for Protein-Ligand +Complexes. Delphine C. Bas, David M. Rogers and Jan H. Jensen. PROTEINS: +Structure, Function, and Bioinformatics 73:765-783 (2008) + +PROPKA3: Consistent Treatment of Internal and Surface Residues in Empirical +pKa predictions. Mats H.M. Olsson, Chresten R. Sondergard, Michal Rostkowski, +and Jan H. Jensen. Journal of Chemical Theory and Computation, 7(2):525-537 +(2011) + +Improved Treatment of Ligands and Coupling Effects in Empirical Calculation +and Rationalization of pKa Values. Chresten R. Sondergaard, Mats H.M. Olsson, +Michal Rostkowski, and Jan H. Jensen. Journal of Chemical Theory and +Computation, (2011) +------------------------------------------------------------------------------- +""" return str_ @@ -416,15 +392,12 @@ def get_determinants_header(): Returns: string """ - str_ = "" - str_ += ("--------- ----- ------ --------------------- " - "-------------- -------------- --------------\n") - str_ += (" DESOLVATION EFFECTS " - "SIDECHAIN BACKBONE COULOMBIC \n") - str_ += (" RESIDUE pKa BURIED REGULAR RE " - "HYDROGEN BOND HYDROGEN BOND INTERACTION \n") - str_ += ("--------- ----- ------ --------- --------- " - "-------------- -------------- --------------\n") + str_ = """ +--------- ----- ------ --------------------- -------------- -------------- -------------- + DESOLVATION EFFECTS SIDECHAIN BACKBONE COULOMBIC + RESIDUE pKa BURIED REGULAR RE HYDROGEN BOND HYDROGEN BOND INTERACTION +--------- ----- ------ --------- --------- -------------- -------------- -------------- +""" return str_ From 83a7099d79999b49e880fdf854010c8f0b0ab2fd Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Fri, 29 May 2020 20:37:47 -0700 Subject: [PATCH 63/65] Fix my formatting mistakes. Python formatting makes me miss K&R C. --- propka/output.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/propka/output.py b/propka/output.py index b8809f9..61f001b 100644 --- a/propka/output.py +++ b/propka/output.py @@ -203,10 +203,9 @@ def get_summary_section(protein, conformation, parameters): return str_ -def get_folding_profile_section(protein, conformation='AVR', - direction="folding", reference="neutral", - window=[0., 14., 1.0], _=False, - __=None): +def get_folding_profile_section( + protein, conformation='AVR', direction="folding", reference="neutral", + window=[0., 14., 1.0], _=False, __=None): """Returns string with the folding profile section of the results. Args: @@ -245,9 +244,9 @@ def get_folding_profile_section(protein, conformation='AVR', dg_opt)) if dg_min is None or dg_max is None: str_ += "Could not determine pH values where the free energy" - str_ += " is within 80 %% of minimum\n" + str_ += " is within 80 % of minimum\n" else: - str_ += "The free energy is within 80 %% of maximum" + str_ += "The free energy is within 80 % of maximum" str_ += " at pH {0:>4.1f} to {1:>4.1f}\n".format(dg_min, dg_max) if ph_min is None or ph_max is None: str_ += "Could not determine the pH-range where the free" From a716c89a3641a7bb8d07b445c1fbb6e03685c93c Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Fri, 29 May 2020 20:49:47 -0700 Subject: [PATCH 64/65] Remove unused argument from Atom() constructor. --- propka/atom.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/propka/atom.py b/propka/atom.py index 40e15b2..1e87af2 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -29,12 +29,11 @@ STR_FMT = ( class Atom(object): """Atom class - contains all atom information found in the PDB file""" - def __init__(self, line=None, _=False): + def __init__(self, line=None): """Initialize Atom object. Args: line: Line from a PDB file to set properties of atom. - _: TODO - this does not appear to be used. Can we remove it? """ self.occ = None self.numb = None From d7147799d815f6f273b4814419cd8d5955ad4569 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Fri, 29 May 2020 20:55:32 -0700 Subject: [PATCH 65/65] Python formatting is annoying. I sometimes miss C -- but only with output formatting. C++ cout marks some decline of civilization, I think... Addresses https://github.com/jensengroup/propka-3.1/pull/40#discussion_r432313434 --- propka/group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/propka/group.py b/propka/group.py index 9251275..465c195 100644 --- a/propka/group.py +++ b/propka/group.py @@ -473,7 +473,7 @@ class Group: str_ += '*' else: str_ += ' ' - str_ += " {0:4d}{1:>2s} ".format(int(100.0*self.buried), "%%") + str_ += " {0:4d}{1:>2s} ".format(int(100.0*self.buried), "%") str_ += " {0:6.2f} {1:4d}".format( self.energy_volume, int(self.num_volume)) str_ += " {0:6.2f} {1:4d}".format(