Merge pull request #3 from schrodinger/master

merge from schrodinger
2014-04-04 17:08:52 +02:00
parent d01659d614 ae462842c1
commit 54ebb22fb4
14 changed files with 278 additions and 77 deletions
--- a/Tests/init.py
+++ b/Tests/init.py
--- a/Tests/hybrid36.py
+++ b/Tests/hybrid36.py
@@ -0,0 +1,57 @@
+import unittest
+
+import propka.hybrid36 as hybrid36
+
+class Hybrid36Test(unittest.TestCase):
+    def testDecode(self):
+        test_values = {
+            "99999": 99999,
+            "A0000": 100000,
+            "0": 0,
+            "9": 9,
+            "A": 10,
+            "  ZZZZY": 43770014,
+            "ZZZZZ": 43770015, # ZZZZZ - A0000 + 100000
+            "a0000": 43770016,
+            "zzzzz": 87440031,
+            "zzzzy": 87440030,
+            "99": 99,
+            "A0": 100,
+            "ZZ": 1035,
+            "zz": 1971,
+            "-99999": -99999,
+            "-A0000": -100000,
+            "-0": 0,
+            "-9": -9,
+            "-A": -10,
+            "-ZZZZY": -43770014,
+            "-ZZZZZ": -43770015, # ZZZZZ - A0000 + 100000
+            "-a0000": -43770016,
+            "-zzzzz": -87440031,
+            "-zzzzy": -87440030,
+            "-99": -99,
+            "-A0": -100,
+            "-ZZ": -1035,
+            "-zz": -1971,
+            "PROPKA": 954495146,
+            "A001Z": 100071,
+            "B0000": 1779616,
+        }
+
+        for k, v in test_values.iteritems():
+            self.assertEqual(hybrid36.decode(k), v)
+
+    def testErrors(self):
+        test_values = [
+            "99X99",
+            "X9-99",
+            "XYZa",
+            "",
+            "-",
+            "!NotOk",
+        ]
+
+        for v in test_values:
+            with self.assertRaises(ValueError) as e:
+                hybrid36.decode(v)
+            self.assertTrue(v in str(e.exception))
--- a/Tests/pkacalc_test.py
+++ b/Tests/pkacalc_test.py
@@ -0,0 +1,68 @@
+
+import os
+import re
+from subprocess import call
+import sys
+import unittest
+
+# Setting this up as a direct translation of the original runtest.py script
+# that will be run as part of 'python setup.py test'. This takes on the
+# order of 10s to run.
+
+class SystemTest(unittest.TestCase):
+    """
+    Run the program and compare against reference results.
+    """
+    def test_pka_calc(self):
+        pdbs = ['1FTJ-Chain-A',
+                '1HPX',
+                '4DFR']
+
+        test_dir = os.path.dirname(__file__)
+        base_dir = os.path.dirname(test_dir)
+
+        executable = os.path.join(base_dir, "scripts", "propka31.py")
+
+        env = { "PYTHONPATH" : base_dir }
+
+        for pdb in pdbs:
+            input_filename = os.path.join(test_dir, "pdb", pdb + ".pdb")
+            output_filename = os.path.join(test_dir, pdb + ".out")
+
+            output_file = open(output_filename, "w")
+            call([sys.executable, executable, input_filename],
+                    stdout=output_file, env=env)
+            output_file.close()
+
+            # Check pka predictions.
+            ref = open(os.path.join(test_dir, "results", pdb + ".dat"))
+            output = open(output_filename)
+
+            atpka = False
+            errors = []
+            for line in output:
+                if not atpka:
+                    # Start testing pka values.
+                    if "model-pKa" in line:
+                        atpka = True
+                    continue
+
+                m = re.search('([0-9]+\.[0-9]+)', line)
+                if not m:
+                    break
+
+                expected_value = float(ref.readline())
+                value = float(m.group(0))
+
+                if(value != expected_value):
+                    identity = line[:m.start()].strip()
+                    errors.append("%12s  %8.2f    %8.2f" %
+                            (identity, expected_value, value))
+
+            ref.close()
+            output.close()
+
+            if errors:
+                error_header = "       Group  Expected  Calculated\n"
+                self.Fail("Unexpected pKa values:\n" + error_header +
+                          "\n".join(errors))
--- a/Tests/runtest.py
+++ b/Tests/runtest.py
@@ -9,6 +9,7 @@ from subprocess import call
 import os, re
 import sys

+if __name__ == "__main__":
    pdbs = ['1FTJ-Chain-A',
            '1HPX',
            '4DFR']
@@ -44,8 +45,3 @@ for pdb in pdbs:
                print(line)
                print(" "+"should be: "+str(r))

-
-
-
-
-
--- a/propka/atom.py
+++ b/propka/atom.py
@@ -4,6 +4,7 @@ from __future__ import print_function

 import string, propka.lib, propka.group

+from . import hybrid36

 class Atom:
    """
@@ -68,13 +69,16 @@ class Atom:

        if line:
            self.name = line[12:16].strip()
-            self.numb = int( line[ 6:11].strip() )
+            self.numb = int( hybrid36.decode(line[ 6:11]) )
            self.x = float( line[30:38].strip() )
            self.y = float( line[38:46].strip() )
            self.z = float( line[46:54].strip() )
            self.resNumb = int( line[22:26].strip() )
            self.resName = "%-3s" % (line[17:20].strip())
-            self.chainID = max(line[21], 'A') # set chain id to A in case of white space
+            self.chainID = line[21]
+            # Set chain id to "_" if it is just white space.
+            if not self.chainID.strip():
+                self.chainID = '_'
            self.type = line[:6].strip().lower()

            if self.resName in ['DA ','DC ','DG ','DT ']:
--- a/propka/bonds.py
+++ b/propka/bonds.py
@@ -354,8 +354,9 @@ class bondmaker:
        #print('z range: [%6.2f;%6.2f] %6.2f'%(zmin,zmax,zlen))

        # how many boxes do we need in each dimension?
-        # NOTE: math.ceil() returns an int in python3 and a float in python2,
-        # so we need to cast it to int for range() to work.
+        # NOTE: math.ceil() returns an int in python3 and a float in
+        # python2, so we need to convert it to an int for range() to work in
+        # both versions. See PEP 3141.
        self.no_box_x = max(1, int(math.ceil(xlen/box_size)))
        self.no_box_y = max(1, int(math.ceil(ylen/box_size)))
        self.no_box_z = max(1, int(math.ceil(zlen/box_size)))
@@ -369,7 +370,7 @@ class bondmaker:
        for x in range(self.no_box_x):
            for y in range(self.no_box_y):
                for z in range(self.no_box_z):
-                    self.boxes[self.box_key(x,y,z)] = []
+                    self.boxes[(x,y,z)] = []

        # put atoms into boxes
        for atom in atoms:
@@ -379,10 +380,8 @@ class bondmaker:
            self.put_atom_in_box(x,y,z,atom)

        # assign bonds
-        keys = self.boxes.keys()
-        for key in keys:
-            self.find_bonds_for_atoms(self.boxes[key])
-
+        for key, value in self.boxes.items():
+            self.find_bonds_for_atoms(value)


        return
@@ -390,21 +389,21 @@ class bondmaker:
    def put_atom_in_box(self,x,y,z,atom):
        # atom in the x,y,z box and the up to 7 neighboring boxes on
        # one side of the x,y,z box in each dimension
+
        for bx in [x,x+1]:
            for by in [y,y+1]:
                for bz in [z,z+1]:
-                    key = self.box_key(bx,by,bz)
-                    if key in self.boxes.keys():
+                    key = (bx,by,bz)
+                    try:
                        self.boxes[key].append(atom)
+                    except KeyError:
+                        # No box exists for this coordinate
+                        pass

                        #print(atom,'->',key,':',len(self.boxes[key]))

        return

-    def box_key(self, x, y, z):
-        return '%d-%d-%d'%(x,y,z)
-
-
    def has_bond(self, atom1, atom2):
        if atom1 in atom2.bonded_atoms or atom2 in atom1.bonded_atoms:
            return True
--- a/propka/calculations.py
+++ b/propka/calculations.py
@@ -364,12 +364,11 @@ def radial_volume_desolvation(parameters, group):
        # desolvation
        if sq_dist < parameters.desolv_cutoff_squared:
            # use a default relative volume of 1.0 if the volume of the element is not found in parameters
-            dv = 1.0
-            if atom.element in parameters.VanDerWaalsVolume.keys():
-                dv = parameters.VanDerWaalsVolume[atom.element]
            # insert check for methyl groups
            if atom.element == 'C' and atom.name not in ['CA','C']:
                dv = parameters.VanDerWaalsVolume['C4']
+            else:
+                dv = parameters.VanDerWaalsVolume.get(atom.element, 1.0)

            dv_inc = dv/max(min_distance_4th, sq_dist*sq_dist)
 #            dv_inc = dv/(sq_dist*sq_dist) - dv/(parameters.desolv_cutoff_squared*parameters.desolv_cutoff_squared)
--- a/propka/conformation_container.py
+++ b/propka/conformation_container.py
@@ -390,19 +390,12 @@ class Conformation_container:

    def top_up(self, other):
        """ Tops up self with all atoms found in other but not in self """
+        my_residue_labels = { a.residue_label for a in self.atoms }
        for atom in other.atoms:
-            if not self.have_atom(atom):
+            if not atom.residue_label in my_residue_labels:
                self.copy_atom(atom)
        return

-    def have_atom(self, atom):
-        res = False
-        for a in self.atoms:
-            if a.residue_label == atom.residue_label:
-                res = True
-                break
-        return res
-
    def find_group(self, group):
        for g in self.groups:
            if g.atom.residue_label == group.atom.residue_label:
--- a/propka/determinants.py
+++ b/propka/determinants.py
@@ -168,11 +168,16 @@ def setIonDeterminants(conformation_container, version):
 def setBackBoneDeterminants(titratable_groups, backbone_groups, version):

    for titratable_group in titratable_groups:
+        titratable_group_interaction_atoms = titratable_group.interaction_atoms_for_acids
+        if not titratable_group_interaction_atoms:
+            continue
+
        # find out which backbone groups this titratable is interacting with
        for backbone_group in backbone_groups:
            # find the interacting atoms
            backbone_interaction_atoms = backbone_group.get_interaction_atoms(titratable_group)
-            titratable_group_interaction_atoms = titratable_group.interaction_atoms_for_acids
+            if not backbone_interaction_atoms:
+                continue

            # find the smallest distance
            [backbone_atom, distance, titratable_atom] = propka.calculations.get_smallest_distance(backbone_interaction_atoms,
--- a/propka/group.py
+++ b/propka/group.py
@@ -420,6 +420,9 @@ class Group:
            return self.interaction_atoms_for_acids #default is acid interaction atoms - cf. 3.0

    def set_center(self, atoms):
+        if not atoms:
+            raise ValueError("At least one atom must be specified")
+
        # reset center
        self.x = 0.0; self.y = 0.0; self.z = 0.0

@@ -614,8 +617,14 @@ class COO_group(Group):
        # Identify the two caroxyl oxygen atoms
        the_oxygens = self.atom.get_bonded_elements('O')

-        # set the center using the two oxygen carboxyl atoms
+        # set the center using the two oxygen carboxyl atoms (if present)
+        if the_oxygens:
            self.set_center(the_oxygens)
+        else:
+            self.set_center([self.atom])
+            # FIXME perhaps it would be better to ignore this group completely
+            # if the oxygen is missing from this residue?
+
        self.set_interaction_atoms(the_oxygens, the_oxygens)
        return

@@ -636,7 +645,12 @@ class HIS_group(Group):
            my_protonator.protonate_atom(r)

        # set the center using the ring atoms
+        if ring_atoms:
            self.set_center(ring_atoms)
+        else:
+            # Missing side-chain atoms
+            self.set_center([self.atom])
+            # FIXME perhaps it would be better to ignore this group completely?

        # find the hydrogens on the ring-nitrogens
        hydrogens = []
@@ -751,8 +765,13 @@ class Cterm_group(Group):

    def setup_atoms(self):
        # Identify the carbon and other oxygen carboxyl atoms
-        the_carbon = self.atom.get_bonded_elements('C')
-        the_other_oxygen = the_carbon[0].get_bonded_elements('O')
+        the_carbons = self.atom.get_bonded_elements('C')
+        if not the_carbons:
+            self.set_center([self.atom])
+            # FIXME perhaps it would be better to ignore this group completely
+            # if the carbon is missing from this residue?
+        else:
+            the_other_oxygen = the_carbons[0].get_bonded_elements('O')
            the_other_oxygen.remove(self.atom)

            # set the center and interaction atoms
--- a/propka/hybrid36.py
+++ b/propka/hybrid36.py
@@ -0,0 +1,52 @@
+import string
+
+_hybrid36_upper_chars = set(string.ascii_uppercase)
+_hybrid36_lower_chars = set(string.ascii_lowercase)
+_hybrid36_digits = set(string.digits)
+_hybrid36_upper_set = _hybrid36_upper_chars | _hybrid36_digits
+_hybrid36_lower_set = _hybrid36_lower_chars | _hybrid36_digits
+
+def decode(input_string):
+    """
+    Convert an input string of a number in hybrid-36 format to an integer.
+
+    """
+    value_error_message = "invalid literal for hybrid-36 conversion: '%s'"
+
+    original_input_string = input_string
+    input_string = input_string.strip()
+
+    # Manually handle negative sign.
+    if input_string.startswith("-"):
+        sign = -1
+        input_string = input_string[1:]
+    else:
+        sign = 1
+
+    if not len(input_string):
+        raise ValueError(value_error_message % input_string)
+
+    # See http://cci.lbl.gov/hybrid_36/ for documentation on the format.
+
+    num_chars = len(input_string)
+    first_char = input_string[0]
+
+    if first_char in _hybrid36_digits:
+        return sign * int(input_string)
+    elif first_char in _hybrid36_upper_chars:
+        reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars)
+        _hybrid36_set = _hybrid36_upper_set
+    elif first_char in _hybrid36_lower_chars:
+        reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars)
+        _hybrid36_set = _hybrid36_lower_set
+    else:
+        raise ValueError(value_error_message % original_input_string)
+
+    # Check the validity of the input string: ASCII characters should be
+    # either all uppercase or all lowercase.
+    for c in input_string[1:]:
+        if c not in _hybrid36_set:
+            raise ValueError(value_error_message % original_input_string)
+
+    # Convert with the int function.
+    return sign * (int(input_string, 36) + reference)
--- a/propka/pdb.py
+++ b/propka/pdb.py
@@ -50,11 +50,17 @@ def protein_precheck(conformations, names):
    for name in names:
        atoms = conformations[name].atoms

-        res_ids = []
-        [res_ids.append(resid_from_atom(a)) for a in atoms if not res_ids.count(resid_from_atom(a))]
+        # Group the atoms by their residue:
+        atoms_by_residue = {}
+        for a in atoms:
+            if a.element != 'H':
+                res_id = resid_from_atom(a)
+                try:
+                    atoms_by_residue[res_id].append(a)
+                except KeyError:
+                    atoms_by_residue[res_id] = [a]

-        for res_id in res_ids:
-            res_atoms = [a for a in atoms if resid_from_atom(a) == res_id and a.element != 'H']
+        for res_id, res_atoms in atoms_by_residue.items():
            resname = res_atoms[0].resName
            residue_label = '%3s%5s'%(resname, res_id)

--- a/propka/protonate.py
+++ b/propka/protonate.py
@@ -47,7 +47,9 @@ class Protonate:
                                  'As':5,
                                  'Se':6,
                                  'Br':7,
-                                  'Kr':8}
+                                  'Kr':8,
+                                  'I':7,
+                                }



--- a/setup.py
+++ b/setup.py
@@ -48,4 +48,5 @@ using the tutorial http://propka.ki.ku.dk/~luca/wiki/index.php/PROPKA_3.1_Tutori
            ],
        },
      zip_safe=True,
+      test_suite="Tests",
 )