diff --git a/Tests/__init__.py b/Tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Tests/hybrid36.py b/Tests/hybrid36.py new file mode 100644 index 0000000..4429e07 --- /dev/null +++ b/Tests/hybrid36.py @@ -0,0 +1,57 @@ +import unittest + +import propka.hybrid36 as hybrid36 + +class Hybrid36Test(unittest.TestCase): + def testDecode(self): + test_values = { + "99999": 99999, + "A0000": 100000, + "0": 0, + "9": 9, + "A": 10, + " ZZZZY": 43770014, + "ZZZZZ": 43770015, # ZZZZZ - A0000 + 100000 + "a0000": 43770016, + "zzzzz": 87440031, + "zzzzy": 87440030, + "99": 99, + "A0": 100, + "ZZ": 1035, + "zz": 1971, + "-99999": -99999, + "-A0000": -100000, + "-0": 0, + "-9": -9, + "-A": -10, + "-ZZZZY": -43770014, + "-ZZZZZ": -43770015, # ZZZZZ - A0000 + 100000 + "-a0000": -43770016, + "-zzzzz": -87440031, + "-zzzzy": -87440030, + "-99": -99, + "-A0": -100, + "-ZZ": -1035, + "-zz": -1971, + "PROPKA": 954495146, + "A001Z": 100071, + "B0000": 1779616, + } + + for k, v in test_values.iteritems(): + self.assertEqual(hybrid36.decode(k), v) + + def testErrors(self): + test_values = [ + "99X99", + "X9-99", + "XYZa", + "", + "-", + "!NotOk", + ] + + for v in test_values: + with self.assertRaises(ValueError) as e: + hybrid36.decode(v) + self.assertTrue(v in str(e.exception)) diff --git a/Tests/pkacalc_test.py b/Tests/pkacalc_test.py new file mode 100644 index 0000000..c82ce4f --- /dev/null +++ b/Tests/pkacalc_test.py @@ -0,0 +1,68 @@ + +import os +import re +from subprocess import call +import sys +import unittest + +# Setting this up as a direct translation of the original runtest.py script +# that will be run as part of 'python setup.py test'. This takes on the +# order of 10s to run. + +class SystemTest(unittest.TestCase): + """ + Run the program and compare against reference results. + """ + def test_pka_calc(self): + pdbs = ['1FTJ-Chain-A', + '1HPX', + '4DFR'] + + test_dir = os.path.dirname(__file__) + base_dir = os.path.dirname(test_dir) + + executable = os.path.join(base_dir, "scripts", "propka31.py") + + env = { "PYTHONPATH" : base_dir } + + for pdb in pdbs: + input_filename = os.path.join(test_dir, "pdb", pdb + ".pdb") + output_filename = os.path.join(test_dir, pdb + ".out") + + output_file = open(output_filename, "w") + call([sys.executable, executable, input_filename], + stdout=output_file, env=env) + output_file.close() + + # Check pka predictions. + ref = open(os.path.join(test_dir, "results", pdb + ".dat")) + output = open(output_filename) + + atpka = False + errors = [] + for line in output: + if not atpka: + # Start testing pka values. + if "model-pKa" in line: + atpka = True + continue + + m = re.search('([0-9]+\.[0-9]+)', line) + if not m: + break + + expected_value = float(ref.readline()) + value = float(m.group(0)) + + if(value != expected_value): + identity = line[:m.start()].strip() + errors.append("%12s %8.2f %8.2f" % + (identity, expected_value, value)) + + ref.close() + output.close() + + if errors: + error_header = " Group Expected Calculated\n" + self.Fail("Unexpected pKa values:\n" + error_header + + "\n".join(errors)) diff --git a/Tests/runtest.py b/Tests/runtest.py index e73517a..3ea030c 100755 --- a/Tests/runtest.py +++ b/Tests/runtest.py @@ -9,43 +9,39 @@ from subprocess import call import os, re import sys -pdbs = ['1FTJ-Chain-A', - '1HPX', - '4DFR'] - -for pdb in pdbs: - print('') - print('RUNNING '+pdb) - - # Run pka calculation - call([sys.executable, '../scripts/propka31.py','pdb/'+pdb+'.pdb'], stdout = open(pdb+'.out', 'w+')) - - # Test pka predictiona - result = open('results/'+pdb+'.dat','r') - atpka = False - for line in open(pdb+'.pka', 'r').readlines(): - if not atpka: - if "model-pKa" in line: - # test pka - atpka = True - continue - else: - continue - if "-" in line: - # done testing - atpka = False - continue - - r = float(result.readline()) - m = re.search('([0-9]+\.[0-9]+)', line) - - if(float(m.group(0)) != r): - print(" ERR:") - print(line) - print(" "+"should be: "+str(r)) +if __name__ == "__main__": + pdbs = ['1FTJ-Chain-A', + '1HPX', + '4DFR'] + for pdb in pdbs: + print('') + print('RUNNING '+pdb) + # Run pka calculation + call([sys.executable, '../scripts/propka31.py','pdb/'+pdb+'.pdb'], stdout = open(pdb+'.out', 'w+')) + # Test pka predictiona + result = open('results/'+pdb+'.dat','r') + atpka = False + for line in open(pdb+'.pka', 'r').readlines(): + if not atpka: + if "model-pKa" in line: + # test pka + atpka = True + continue + else: + continue + if "-" in line: + # done testing + atpka = False + continue + r = float(result.readline()) + m = re.search('([0-9]+\.[0-9]+)', line) + if(float(m.group(0)) != r): + print(" ERR:") + print(line) + print(" "+"should be: "+str(r)) diff --git a/propka/atom.py b/propka/atom.py index b9abec4..4b71b2a 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -4,6 +4,7 @@ from __future__ import print_function import string, propka.lib, propka.group +from . import hybrid36 class Atom: """ @@ -68,13 +69,16 @@ class Atom: if line: self.name = line[12:16].strip() - self.numb = int( line[ 6:11].strip() ) + self.numb = int( hybrid36.decode(line[ 6:11]) ) self.x = float( line[30:38].strip() ) self.y = float( line[38:46].strip() ) self.z = float( line[46:54].strip() ) self.resNumb = int( line[22:26].strip() ) self.resName = "%-3s" % (line[17:20].strip()) - self.chainID = max(line[21], 'A') # set chain id to A in case of white space + self.chainID = line[21] + # Set chain id to "_" if it is just white space. + if not self.chainID.strip(): + self.chainID = '_' self.type = line[:6].strip().lower() if self.resName in ['DA ','DC ','DG ','DT ']: diff --git a/propka/bonds.py b/propka/bonds.py index c0f7e71..f475338 100644 --- a/propka/bonds.py +++ b/propka/bonds.py @@ -354,8 +354,9 @@ class bondmaker: #print('z range: [%6.2f;%6.2f] %6.2f'%(zmin,zmax,zlen)) # how many boxes do we need in each dimension? - # NOTE: math.ceil() returns an int in python3 and a float in python2, - # so we need to cast it to int for range() to work. + # NOTE: math.ceil() returns an int in python3 and a float in + # python2, so we need to convert it to an int for range() to work in + # both versions. See PEP 3141. self.no_box_x = max(1, int(math.ceil(xlen/box_size))) self.no_box_y = max(1, int(math.ceil(ylen/box_size))) self.no_box_z = max(1, int(math.ceil(zlen/box_size))) @@ -369,7 +370,7 @@ class bondmaker: for x in range(self.no_box_x): for y in range(self.no_box_y): for z in range(self.no_box_z): - self.boxes[self.box_key(x,y,z)] = [] + self.boxes[(x,y,z)] = [] # put atoms into boxes for atom in atoms: @@ -379,10 +380,8 @@ class bondmaker: self.put_atom_in_box(x,y,z,atom) # assign bonds - keys = self.boxes.keys() - for key in keys: - self.find_bonds_for_atoms(self.boxes[key]) - + for key, value in self.boxes.items(): + self.find_bonds_for_atoms(value) return @@ -390,21 +389,21 @@ class bondmaker: def put_atom_in_box(self,x,y,z,atom): # atom in the x,y,z box and the up to 7 neighboring boxes on # one side of the x,y,z box in each dimension + for bx in [x,x+1]: for by in [y,y+1]: for bz in [z,z+1]: - key = self.box_key(bx,by,bz) - if key in self.boxes.keys(): + key = (bx,by,bz) + try: self.boxes[key].append(atom) + except KeyError: + # No box exists for this coordinate + pass #print(atom,'->',key,':',len(self.boxes[key])) return - def box_key(self, x, y, z): - return '%d-%d-%d'%(x,y,z) - - def has_bond(self, atom1, atom2): if atom1 in atom2.bonded_atoms or atom2 in atom1.bonded_atoms: return True diff --git a/propka/calculations.py b/propka/calculations.py index 5f71f8e..88224ef 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -364,12 +364,11 @@ def radial_volume_desolvation(parameters, group): # desolvation if sq_dist < parameters.desolv_cutoff_squared: # use a default relative volume of 1.0 if the volume of the element is not found in parameters - dv = 1.0 - if atom.element in parameters.VanDerWaalsVolume.keys(): - dv = parameters.VanDerWaalsVolume[atom.element] # insert check for methyl groups if atom.element == 'C' and atom.name not in ['CA','C']: dv = parameters.VanDerWaalsVolume['C4'] + else: + dv = parameters.VanDerWaalsVolume.get(atom.element, 1.0) dv_inc = dv/max(min_distance_4th, sq_dist*sq_dist) # dv_inc = dv/(sq_dist*sq_dist) - dv/(parameters.desolv_cutoff_squared*parameters.desolv_cutoff_squared) diff --git a/propka/conformation_container.py b/propka/conformation_container.py index 76439d4..ab4f821 100644 --- a/propka/conformation_container.py +++ b/propka/conformation_container.py @@ -390,19 +390,12 @@ class Conformation_container: def top_up(self, other): """ Tops up self with all atoms found in other but not in self """ + my_residue_labels = { a.residue_label for a in self.atoms } for atom in other.atoms: - if not self.have_atom(atom): + if not atom.residue_label in my_residue_labels: self.copy_atom(atom) return - def have_atom(self, atom): - res = False - for a in self.atoms: - if a.residue_label == atom.residue_label: - res = True - break - return res - def find_group(self, group): for g in self.groups: if g.atom.residue_label == group.atom.residue_label: diff --git a/propka/determinants.py b/propka/determinants.py index b3ed586..4299432 100644 --- a/propka/determinants.py +++ b/propka/determinants.py @@ -168,11 +168,16 @@ def setIonDeterminants(conformation_container, version): def setBackBoneDeterminants(titratable_groups, backbone_groups, version): for titratable_group in titratable_groups: + titratable_group_interaction_atoms = titratable_group.interaction_atoms_for_acids + if not titratable_group_interaction_atoms: + continue + # find out which backbone groups this titratable is interacting with for backbone_group in backbone_groups: # find the interacting atoms backbone_interaction_atoms = backbone_group.get_interaction_atoms(titratable_group) - titratable_group_interaction_atoms = titratable_group.interaction_atoms_for_acids + if not backbone_interaction_atoms: + continue # find the smallest distance [backbone_atom, distance, titratable_atom] = propka.calculations.get_smallest_distance(backbone_interaction_atoms, diff --git a/propka/group.py b/propka/group.py index ac52d3e..b5c5a06 100644 --- a/propka/group.py +++ b/propka/group.py @@ -420,6 +420,9 @@ class Group: return self.interaction_atoms_for_acids #default is acid interaction atoms - cf. 3.0 def set_center(self, atoms): + if not atoms: + raise ValueError("At least one atom must be specified") + # reset center self.x = 0.0; self.y = 0.0; self.z = 0.0 @@ -614,8 +617,14 @@ class COO_group(Group): # Identify the two caroxyl oxygen atoms the_oxygens = self.atom.get_bonded_elements('O') - # set the center using the two oxygen carboxyl atoms - self.set_center(the_oxygens) + # set the center using the two oxygen carboxyl atoms (if present) + if the_oxygens: + self.set_center(the_oxygens) + else: + self.set_center([self.atom]) + # FIXME perhaps it would be better to ignore this group completely + # if the oxygen is missing from this residue? + self.set_interaction_atoms(the_oxygens, the_oxygens) return @@ -636,7 +645,12 @@ class HIS_group(Group): my_protonator.protonate_atom(r) # set the center using the ring atoms - self.set_center(ring_atoms) + if ring_atoms: + self.set_center(ring_atoms) + else: + # Missing side-chain atoms + self.set_center([self.atom]) + # FIXME perhaps it would be better to ignore this group completely? # find the hydrogens on the ring-nitrogens hydrogens = [] @@ -751,14 +765,19 @@ class Cterm_group(Group): def setup_atoms(self): # Identify the carbon and other oxygen carboxyl atoms - the_carbon = self.atom.get_bonded_elements('C') - the_other_oxygen = the_carbon[0].get_bonded_elements('O') - the_other_oxygen.remove(self.atom) + the_carbons = self.atom.get_bonded_elements('C') + if not the_carbons: + self.set_center([self.atom]) + # FIXME perhaps it would be better to ignore this group completely + # if the carbon is missing from this residue? + else: + the_other_oxygen = the_carbons[0].get_bonded_elements('O') + the_other_oxygen.remove(self.atom) - # set the center and interaction atoms - the_oxygens = [self.atom]+ the_other_oxygen - self.set_center(the_oxygens) - self.set_interaction_atoms(the_oxygens, the_oxygens) + # set the center and interaction atoms + the_oxygens = [self.atom]+ the_other_oxygen + self.set_center(the_oxygens) + self.set_interaction_atoms(the_oxygens, the_oxygens) return diff --git a/propka/hybrid36.py b/propka/hybrid36.py new file mode 100644 index 0000000..f5b96af --- /dev/null +++ b/propka/hybrid36.py @@ -0,0 +1,52 @@ +import string + +_hybrid36_upper_chars = set(string.ascii_uppercase) +_hybrid36_lower_chars = set(string.ascii_lowercase) +_hybrid36_digits = set(string.digits) +_hybrid36_upper_set = _hybrid36_upper_chars | _hybrid36_digits +_hybrid36_lower_set = _hybrid36_lower_chars | _hybrid36_digits + +def decode(input_string): + """ + Convert an input string of a number in hybrid-36 format to an integer. + + """ + value_error_message = "invalid literal for hybrid-36 conversion: '%s'" + + original_input_string = input_string + input_string = input_string.strip() + + # Manually handle negative sign. + if input_string.startswith("-"): + sign = -1 + input_string = input_string[1:] + else: + sign = 1 + + if not len(input_string): + raise ValueError(value_error_message % input_string) + + # See http://cci.lbl.gov/hybrid_36/ for documentation on the format. + + num_chars = len(input_string) + first_char = input_string[0] + + if first_char in _hybrid36_digits: + return sign * int(input_string) + elif first_char in _hybrid36_upper_chars: + reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars) + _hybrid36_set = _hybrid36_upper_set + elif first_char in _hybrid36_lower_chars: + reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars) + _hybrid36_set = _hybrid36_lower_set + else: + raise ValueError(value_error_message % original_input_string) + + # Check the validity of the input string: ASCII characters should be + # either all uppercase or all lowercase. + for c in input_string[1:]: + if c not in _hybrid36_set: + raise ValueError(value_error_message % original_input_string) + + # Convert with the int function. + return sign * (int(input_string, 36) + reference) diff --git a/propka/pdb.py b/propka/pdb.py index 9443b35..ebed33c 100644 --- a/propka/pdb.py +++ b/propka/pdb.py @@ -50,11 +50,17 @@ def protein_precheck(conformations, names): for name in names: atoms = conformations[name].atoms - res_ids = [] - [res_ids.append(resid_from_atom(a)) for a in atoms if not res_ids.count(resid_from_atom(a))] + # Group the atoms by their residue: + atoms_by_residue = {} + for a in atoms: + if a.element != 'H': + res_id = resid_from_atom(a) + try: + atoms_by_residue[res_id].append(a) + except KeyError: + atoms_by_residue[res_id] = [a] - for res_id in res_ids: - res_atoms = [a for a in atoms if resid_from_atom(a) == res_id and a.element != 'H'] + for res_id, res_atoms in atoms_by_residue.items(): resname = res_atoms[0].resName residue_label = '%3s%5s'%(resname, res_id) diff --git a/propka/protonate.py b/propka/protonate.py index 396790b..34b8a01 100644 --- a/propka/protonate.py +++ b/propka/protonate.py @@ -47,7 +47,9 @@ class Protonate: 'As':5, 'Se':6, 'Br':7, - 'Kr':8} + 'Kr':8, + 'I':7, + } diff --git a/setup.py b/setup.py index 1bee359..94e7ea4 100644 --- a/setup.py +++ b/setup.py @@ -48,4 +48,5 @@ using the tutorial http://propka.ki.ku.dk/~luca/wiki/index.php/PROPKA_3.1_Tutori ], }, zip_safe=True, + test_suite="Tests", )