From edd25afad34620b159377c3f448b6efa63f5e334 Mon Sep 17 00:00:00 2001 From: Mike Beachy Date: Thu, 20 Dec 2012 11:48:31 -0500 Subject: [PATCH 1/6] add a reference to the math.ceil note --- propka/bonds.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/propka/bonds.py b/propka/bonds.py index c0f7e71..e199858 100644 --- a/propka/bonds.py +++ b/propka/bonds.py @@ -354,8 +354,9 @@ class bondmaker: #print('z range: [%6.2f;%6.2f] %6.2f'%(zmin,zmax,zlen)) # how many boxes do we need in each dimension? - # NOTE: math.ceil() returns an int in python3 and a float in python2, - # so we need to cast it to int for range() to work. + # NOTE: math.ceil() returns an int in python3 and a float in + # python2, so we need to convert it to an int for range() to work in + # both versions. See PEP 3141. self.no_box_x = max(1, int(math.ceil(xlen/box_size))) self.no_box_y = max(1, int(math.ceil(ylen/box_size))) self.no_box_z = max(1, int(math.ceil(zlen/box_size))) From 61bb87524818f1c24b48b9a737ba044fefce2a91 Mon Sep 17 00:00:00 2001 From: Mike Beachy Date: Sat, 22 Feb 2014 17:33:18 -0500 Subject: [PATCH 2/6] add test framework - add the ability to run tests from the top level directory via 'python setup.py test' - translate runtest.py into a test that runs under this framework - add a __main__ section to Tests/runtest.py and change its indent to 4 spaces; make no other changes to this script --- Tests/__init__.py | 0 Tests/pkacalc_test.py | 68 +++++++++++++++++++++++++++++++++++++++++++ Tests/runtest.py | 64 +++++++++++++++++++--------------------- setup.py | 1 + 4 files changed, 99 insertions(+), 34 deletions(-) create mode 100644 Tests/__init__.py create mode 100644 Tests/pkacalc_test.py diff --git a/Tests/__init__.py b/Tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Tests/pkacalc_test.py b/Tests/pkacalc_test.py new file mode 100644 index 0000000..04f896e --- /dev/null +++ b/Tests/pkacalc_test.py @@ -0,0 +1,68 @@ + +import os +import re +from subprocess import call +import sys +import unittest + +# Setting this up as a direct translation of the original runtest.py script +# that will be run as part of 'python setup.py test'. This takes on the +# order of 30s to run. + +class SystemTest(unittest.TestCase): + """ + Run the program and compare against reference results. + """ + def test_pka_calc(self): + pdbs = ['1FTJ-Chain-A', + '1HPX', + '4DFR'] + + test_dir = os.path.dirname(__file__) + base_dir = os.path.dirname(test_dir) + + executable = os.path.join(base_dir, "scripts", "propka31.py") + + env = { "PYTHONPATH" : base_dir } + + for pdb in pdbs: + input_filename = os.path.join(test_dir, "pdb", pdb + ".pdb") + output_filename = os.path.join(test_dir, pdb + ".out") + + output_file = open(output_filename, "w") + call([sys.executable, executable, input_filename], + stdout=output_file, env=env) + output_file.close() + + # Check pka predictions. + ref = open(os.path.join(test_dir, "results", pdb + ".dat")) + output = open(output_filename) + + atpka = False + errors = [] + for line in output: + if not atpka: + # Start testing pka values. + if "model-pKa" in line: + atpka = True + continue + + m = re.search('([0-9]+\.[0-9]+)', line) + if not m: + break + + expected_value = float(ref.readline()) + value = float(m.group(0)) + + if(value != expected_value): + identity = line[:m.start()].strip() + errors.append("%12s %8.2f %8.2f" % + (identity, expected_value, value)) + + ref.close() + output.close() + + if errors: + error_header = " Group Expected Calculated\n" + self.Fail("Unexpected pKa values:\n" + error_header + + "\n".join(errors)) diff --git a/Tests/runtest.py b/Tests/runtest.py index e73517a..3ea030c 100755 --- a/Tests/runtest.py +++ b/Tests/runtest.py @@ -9,43 +9,39 @@ from subprocess import call import os, re import sys -pdbs = ['1FTJ-Chain-A', - '1HPX', - '4DFR'] - -for pdb in pdbs: - print('') - print('RUNNING '+pdb) - - # Run pka calculation - call([sys.executable, '../scripts/propka31.py','pdb/'+pdb+'.pdb'], stdout = open(pdb+'.out', 'w+')) - - # Test pka predictiona - result = open('results/'+pdb+'.dat','r') - atpka = False - for line in open(pdb+'.pka', 'r').readlines(): - if not atpka: - if "model-pKa" in line: - # test pka - atpka = True - continue - else: - continue - if "-" in line: - # done testing - atpka = False - continue - - r = float(result.readline()) - m = re.search('([0-9]+\.[0-9]+)', line) - - if(float(m.group(0)) != r): - print(" ERR:") - print(line) - print(" "+"should be: "+str(r)) +if __name__ == "__main__": + pdbs = ['1FTJ-Chain-A', + '1HPX', + '4DFR'] + for pdb in pdbs: + print('') + print('RUNNING '+pdb) + # Run pka calculation + call([sys.executable, '../scripts/propka31.py','pdb/'+pdb+'.pdb'], stdout = open(pdb+'.out', 'w+')) + # Test pka predictiona + result = open('results/'+pdb+'.dat','r') + atpka = False + for line in open(pdb+'.pka', 'r').readlines(): + if not atpka: + if "model-pKa" in line: + # test pka + atpka = True + continue + else: + continue + if "-" in line: + # done testing + atpka = False + continue + r = float(result.readline()) + m = re.search('([0-9]+\.[0-9]+)', line) + if(float(m.group(0)) != r): + print(" ERR:") + print(line) + print(" "+"should be: "+str(r)) diff --git a/setup.py b/setup.py index 1bee359..94e7ea4 100644 --- a/setup.py +++ b/setup.py @@ -48,4 +48,5 @@ using the tutorial http://propka.ki.ku.dk/~luca/wiki/index.php/PROPKA_3.1_Tutori ], }, zip_safe=True, + test_suite="Tests", ) From e0ed5da44be83dae4e384a2fca59a4a1089c0054 Mon Sep 17 00:00:00 2001 From: Mike Beachy Date: Sat, 22 Feb 2014 17:42:49 -0500 Subject: [PATCH 3/6] handle structures with 100,000 or more atoms - the pdb format requires a 5-character field for the atom number; the hybrid-36 format allows up to a ridiculous number of atoms --- Tests/hybrid36.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++ propka/atom.py | 3 ++- propka/hybrid36.py | 52 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 Tests/hybrid36.py create mode 100644 propka/hybrid36.py diff --git a/Tests/hybrid36.py b/Tests/hybrid36.py new file mode 100644 index 0000000..4429e07 --- /dev/null +++ b/Tests/hybrid36.py @@ -0,0 +1,57 @@ +import unittest + +import propka.hybrid36 as hybrid36 + +class Hybrid36Test(unittest.TestCase): + def testDecode(self): + test_values = { + "99999": 99999, + "A0000": 100000, + "0": 0, + "9": 9, + "A": 10, + " ZZZZY": 43770014, + "ZZZZZ": 43770015, # ZZZZZ - A0000 + 100000 + "a0000": 43770016, + "zzzzz": 87440031, + "zzzzy": 87440030, + "99": 99, + "A0": 100, + "ZZ": 1035, + "zz": 1971, + "-99999": -99999, + "-A0000": -100000, + "-0": 0, + "-9": -9, + "-A": -10, + "-ZZZZY": -43770014, + "-ZZZZZ": -43770015, # ZZZZZ - A0000 + 100000 + "-a0000": -43770016, + "-zzzzz": -87440031, + "-zzzzy": -87440030, + "-99": -99, + "-A0": -100, + "-ZZ": -1035, + "-zz": -1971, + "PROPKA": 954495146, + "A001Z": 100071, + "B0000": 1779616, + } + + for k, v in test_values.iteritems(): + self.assertEqual(hybrid36.decode(k), v) + + def testErrors(self): + test_values = [ + "99X99", + "X9-99", + "XYZa", + "", + "-", + "!NotOk", + ] + + for v in test_values: + with self.assertRaises(ValueError) as e: + hybrid36.decode(v) + self.assertTrue(v in str(e.exception)) diff --git a/propka/atom.py b/propka/atom.py index b9abec4..31a83dc 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -4,6 +4,7 @@ from __future__ import print_function import string, propka.lib, propka.group +from . import hybrid36 class Atom: """ @@ -68,7 +69,7 @@ class Atom: if line: self.name = line[12:16].strip() - self.numb = int( line[ 6:11].strip() ) + self.numb = int( hybrid36.decode(line[ 6:11]) ) self.x = float( line[30:38].strip() ) self.y = float( line[38:46].strip() ) self.z = float( line[46:54].strip() ) diff --git a/propka/hybrid36.py b/propka/hybrid36.py new file mode 100644 index 0000000..f5b96af --- /dev/null +++ b/propka/hybrid36.py @@ -0,0 +1,52 @@ +import string + +_hybrid36_upper_chars = set(string.ascii_uppercase) +_hybrid36_lower_chars = set(string.ascii_lowercase) +_hybrid36_digits = set(string.digits) +_hybrid36_upper_set = _hybrid36_upper_chars | _hybrid36_digits +_hybrid36_lower_set = _hybrid36_lower_chars | _hybrid36_digits + +def decode(input_string): + """ + Convert an input string of a number in hybrid-36 format to an integer. + + """ + value_error_message = "invalid literal for hybrid-36 conversion: '%s'" + + original_input_string = input_string + input_string = input_string.strip() + + # Manually handle negative sign. + if input_string.startswith("-"): + sign = -1 + input_string = input_string[1:] + else: + sign = 1 + + if not len(input_string): + raise ValueError(value_error_message % input_string) + + # See http://cci.lbl.gov/hybrid_36/ for documentation on the format. + + num_chars = len(input_string) + first_char = input_string[0] + + if first_char in _hybrid36_digits: + return sign * int(input_string) + elif first_char in _hybrid36_upper_chars: + reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars) + _hybrid36_set = _hybrid36_upper_set + elif first_char in _hybrid36_lower_chars: + reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars) + _hybrid36_set = _hybrid36_lower_set + else: + raise ValueError(value_error_message % original_input_string) + + # Check the validity of the input string: ASCII characters should be + # either all uppercase or all lowercase. + for c in input_string[1:]: + if c not in _hybrid36_set: + raise ValueError(value_error_message % original_input_string) + + # Convert with the int function. + return sign * (int(input_string, 36) + reference) From 73c7a2a4be2bbc52e8baea101f1186b0bf6f8e59 Mon Sep 17 00:00:00 2001 From: Matvey Adzhigirey Date: Tue, 4 Dec 2012 17:21:24 -0800 Subject: [PATCH 4/6] optimize a number of functions - Major optimization of the put_atom_in_box() function. - Major optimization of the protein_precheck() function. - Minor optimization in radial_volume_desolvation() - Optimization in top_up() to scale for large structures. --- Tests/pkacalc_test.py | 2 +- propka/bonds.py | 20 +++++++++----------- propka/calculations.py | 5 ++--- propka/conformation_container.py | 11 ++--------- propka/pdb.py | 14 ++++++++++---- 5 files changed, 24 insertions(+), 28 deletions(-) diff --git a/Tests/pkacalc_test.py b/Tests/pkacalc_test.py index 04f896e..c82ce4f 100644 --- a/Tests/pkacalc_test.py +++ b/Tests/pkacalc_test.py @@ -7,7 +7,7 @@ import unittest # Setting this up as a direct translation of the original runtest.py script # that will be run as part of 'python setup.py test'. This takes on the -# order of 30s to run. +# order of 10s to run. class SystemTest(unittest.TestCase): """ diff --git a/propka/bonds.py b/propka/bonds.py index e199858..f475338 100644 --- a/propka/bonds.py +++ b/propka/bonds.py @@ -370,7 +370,7 @@ class bondmaker: for x in range(self.no_box_x): for y in range(self.no_box_y): for z in range(self.no_box_z): - self.boxes[self.box_key(x,y,z)] = [] + self.boxes[(x,y,z)] = [] # put atoms into boxes for atom in atoms: @@ -380,10 +380,8 @@ class bondmaker: self.put_atom_in_box(x,y,z,atom) # assign bonds - keys = self.boxes.keys() - for key in keys: - self.find_bonds_for_atoms(self.boxes[key]) - + for key, value in self.boxes.items(): + self.find_bonds_for_atoms(value) return @@ -391,21 +389,21 @@ class bondmaker: def put_atom_in_box(self,x,y,z,atom): # atom in the x,y,z box and the up to 7 neighboring boxes on # one side of the x,y,z box in each dimension + for bx in [x,x+1]: for by in [y,y+1]: for bz in [z,z+1]: - key = self.box_key(bx,by,bz) - if key in self.boxes.keys(): + key = (bx,by,bz) + try: self.boxes[key].append(atom) + except KeyError: + # No box exists for this coordinate + pass #print(atom,'->',key,':',len(self.boxes[key])) return - def box_key(self, x, y, z): - return '%d-%d-%d'%(x,y,z) - - def has_bond(self, atom1, atom2): if atom1 in atom2.bonded_atoms or atom2 in atom1.bonded_atoms: return True diff --git a/propka/calculations.py b/propka/calculations.py index 5f71f8e..88224ef 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -364,12 +364,11 @@ def radial_volume_desolvation(parameters, group): # desolvation if sq_dist < parameters.desolv_cutoff_squared: # use a default relative volume of 1.0 if the volume of the element is not found in parameters - dv = 1.0 - if atom.element in parameters.VanDerWaalsVolume.keys(): - dv = parameters.VanDerWaalsVolume[atom.element] # insert check for methyl groups if atom.element == 'C' and atom.name not in ['CA','C']: dv = parameters.VanDerWaalsVolume['C4'] + else: + dv = parameters.VanDerWaalsVolume.get(atom.element, 1.0) dv_inc = dv/max(min_distance_4th, sq_dist*sq_dist) # dv_inc = dv/(sq_dist*sq_dist) - dv/(parameters.desolv_cutoff_squared*parameters.desolv_cutoff_squared) diff --git a/propka/conformation_container.py b/propka/conformation_container.py index 76439d4..ab4f821 100644 --- a/propka/conformation_container.py +++ b/propka/conformation_container.py @@ -390,19 +390,12 @@ class Conformation_container: def top_up(self, other): """ Tops up self with all atoms found in other but not in self """ + my_residue_labels = { a.residue_label for a in self.atoms } for atom in other.atoms: - if not self.have_atom(atom): + if not atom.residue_label in my_residue_labels: self.copy_atom(atom) return - def have_atom(self, atom): - res = False - for a in self.atoms: - if a.residue_label == atom.residue_label: - res = True - break - return res - def find_group(self, group): for g in self.groups: if g.atom.residue_label == group.atom.residue_label: diff --git a/propka/pdb.py b/propka/pdb.py index 9443b35..ebed33c 100644 --- a/propka/pdb.py +++ b/propka/pdb.py @@ -50,11 +50,17 @@ def protein_precheck(conformations, names): for name in names: atoms = conformations[name].atoms - res_ids = [] - [res_ids.append(resid_from_atom(a)) for a in atoms if not res_ids.count(resid_from_atom(a))] + # Group the atoms by their residue: + atoms_by_residue = {} + for a in atoms: + if a.element != 'H': + res_id = resid_from_atom(a) + try: + atoms_by_residue[res_id].append(a) + except KeyError: + atoms_by_residue[res_id] = [a] - for res_id in res_ids: - res_atoms = [a for a in atoms if resid_from_atom(a) == res_id and a.element != 'H'] + for res_id, res_atoms in atoms_by_residue.items(): resname = res_atoms[0].resName residue_label = '%3s%5s'%(resname, res_id) From 4566cd67f9a6a641ee87985cc768be9ef631399b Mon Sep 17 00:00:00 2001 From: Matvey Adzhigirey Date: Tue, 4 Dec 2012 20:10:09 -0800 Subject: [PATCH 5/6] Fix a number of crashes and special conditions - Fix an issue where non-existing interactions were analyzed. Bug fix in setIonDeterminants(): skip interactions if either titratable_group_interaction_atoms or the backbone_interaction_atoms is empty. - Set center of COO_group to self.atom if no oxygens present in residue. COO_group: If self.atom.get_bonded_elements('O') returns an emtpy list (the residue has missing side-chain), simply set the center of the group to the atom, instead of the center of the (non-existing) oxygens. - Raise more friendly exception in Group.set_center() if an empty atom set is passed in. - Cterm_group: Fix crash if residue is missing a carbon. The previous code would crash if the C-termini residue group was missing the carbon atom. - Fix a failure when a HIS residue has missing side-chain atoms. - Added an entry for Iodine (I) to the valence electrons table. --- propka/determinants.py | 7 ++++++- propka/group.py | 39 +++++++++++++++++++++++++++++---------- propka/protonate.py | 4 +++- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/propka/determinants.py b/propka/determinants.py index b3ed586..4299432 100644 --- a/propka/determinants.py +++ b/propka/determinants.py @@ -168,11 +168,16 @@ def setIonDeterminants(conformation_container, version): def setBackBoneDeterminants(titratable_groups, backbone_groups, version): for titratable_group in titratable_groups: + titratable_group_interaction_atoms = titratable_group.interaction_atoms_for_acids + if not titratable_group_interaction_atoms: + continue + # find out which backbone groups this titratable is interacting with for backbone_group in backbone_groups: # find the interacting atoms backbone_interaction_atoms = backbone_group.get_interaction_atoms(titratable_group) - titratable_group_interaction_atoms = titratable_group.interaction_atoms_for_acids + if not backbone_interaction_atoms: + continue # find the smallest distance [backbone_atom, distance, titratable_atom] = propka.calculations.get_smallest_distance(backbone_interaction_atoms, diff --git a/propka/group.py b/propka/group.py index ac52d3e..b5c5a06 100644 --- a/propka/group.py +++ b/propka/group.py @@ -420,6 +420,9 @@ class Group: return self.interaction_atoms_for_acids #default is acid interaction atoms - cf. 3.0 def set_center(self, atoms): + if not atoms: + raise ValueError("At least one atom must be specified") + # reset center self.x = 0.0; self.y = 0.0; self.z = 0.0 @@ -614,8 +617,14 @@ class COO_group(Group): # Identify the two caroxyl oxygen atoms the_oxygens = self.atom.get_bonded_elements('O') - # set the center using the two oxygen carboxyl atoms - self.set_center(the_oxygens) + # set the center using the two oxygen carboxyl atoms (if present) + if the_oxygens: + self.set_center(the_oxygens) + else: + self.set_center([self.atom]) + # FIXME perhaps it would be better to ignore this group completely + # if the oxygen is missing from this residue? + self.set_interaction_atoms(the_oxygens, the_oxygens) return @@ -636,7 +645,12 @@ class HIS_group(Group): my_protonator.protonate_atom(r) # set the center using the ring atoms - self.set_center(ring_atoms) + if ring_atoms: + self.set_center(ring_atoms) + else: + # Missing side-chain atoms + self.set_center([self.atom]) + # FIXME perhaps it would be better to ignore this group completely? # find the hydrogens on the ring-nitrogens hydrogens = [] @@ -751,14 +765,19 @@ class Cterm_group(Group): def setup_atoms(self): # Identify the carbon and other oxygen carboxyl atoms - the_carbon = self.atom.get_bonded_elements('C') - the_other_oxygen = the_carbon[0].get_bonded_elements('O') - the_other_oxygen.remove(self.atom) + the_carbons = self.atom.get_bonded_elements('C') + if not the_carbons: + self.set_center([self.atom]) + # FIXME perhaps it would be better to ignore this group completely + # if the carbon is missing from this residue? + else: + the_other_oxygen = the_carbons[0].get_bonded_elements('O') + the_other_oxygen.remove(self.atom) - # set the center and interaction atoms - the_oxygens = [self.atom]+ the_other_oxygen - self.set_center(the_oxygens) - self.set_interaction_atoms(the_oxygens, the_oxygens) + # set the center and interaction atoms + the_oxygens = [self.atom]+ the_other_oxygen + self.set_center(the_oxygens) + self.set_interaction_atoms(the_oxygens, the_oxygens) return diff --git a/propka/protonate.py b/propka/protonate.py index 396790b..34b8a01 100644 --- a/propka/protonate.py +++ b/propka/protonate.py @@ -47,7 +47,9 @@ class Protonate: 'As':5, 'Se':6, 'Br':7, - 'Kr':8} + 'Kr':8, + 'I':7, + } From ae462842c103d7359ece91be26eada92512a4039 Mon Sep 17 00:00:00 2001 From: Matvey Adzhigirey Date: Tue, 11 Dec 2012 16:20:35 -0800 Subject: [PATCH 6/6] replace empty chains with "_" instead "A" --- propka/atom.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/propka/atom.py b/propka/atom.py index 31a83dc..4b71b2a 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -75,7 +75,10 @@ class Atom: self.z = float( line[46:54].strip() ) self.resNumb = int( line[22:26].strip() ) self.resName = "%-3s" % (line[17:20].strip()) - self.chainID = max(line[21], 'A') # set chain id to A in case of white space + self.chainID = line[21] + # Set chain id to "_" if it is just white space. + if not self.chainID.strip(): + self.chainID = '_' self.type = line[:6].strip().lower() if self.resName in ['DA ','DC ','DG ','DT ']: