Merge pull request #3 from schrodinger/master

merge from schrodinger
2014-04-04 17:08:52 +02:00
parent d01659d614 ae462842c1
commit 54ebb22fb4
14 changed files with 278 additions and 77 deletions
--- a/Tests/init.py
+++ b/Tests/init.py
--- a/Tests/hybrid36.py
+++ b/Tests/hybrid36.py
@@ -0,0 +1,57 @@
 import unittest
 import propka.hybrid36 as hybrid36
 class Hybrid36Test(unittest.TestCase):
    def testDecode(self):
        test_values = {
            "99999": 99999,
            "A0000": 100000,
            "0": 0,
            "9": 9,
            "A": 10,
            "  ZZZZY": 43770014,
            "ZZZZZ": 43770015, # ZZZZZ - A0000 + 100000
            "a0000": 43770016,
            "zzzzz": 87440031,
            "zzzzy": 87440030,
            "99": 99,
            "A0": 100,
            "ZZ": 1035,
            "zz": 1971,
            "-99999": -99999,
            "-A0000": -100000,
            "-0": 0,
            "-9": -9,
            "-A": -10,
            "-ZZZZY": -43770014,
            "-ZZZZZ": -43770015, # ZZZZZ - A0000 + 100000
            "-a0000": -43770016,
            "-zzzzz": -87440031,
            "-zzzzy": -87440030,
            "-99": -99,
            "-A0": -100,
            "-ZZ": -1035,
            "-zz": -1971,
            "PROPKA": 954495146,
            "A001Z": 100071,
            "B0000": 1779616,
        }
        for k, v in test_values.iteritems():
            self.assertEqual(hybrid36.decode(k), v)
    def testErrors(self):
        test_values = [
            "99X99",
            "X9-99",
            "XYZa",
            "",
            "-",
            "!NotOk",
        ]
        for v in test_values:
            with self.assertRaises(ValueError) as e:
                hybrid36.decode(v)
            self.assertTrue(v in str(e.exception))
--- a/Tests/pkacalc_test.py
+++ b/Tests/pkacalc_test.py
@@ -0,0 +1,68 @@
 import os
 import re
 from subprocess import call
 import sys
 import unittest
 # Setting this up as a direct translation of the original runtest.py script
 # that will be run as part of 'python setup.py test'. This takes on the
 # order of 10s to run.
 class SystemTest(unittest.TestCase):
    """
    Run the program and compare against reference results.
    """
    def test_pka_calc(self):
        pdbs = ['1FTJ-Chain-A',
                '1HPX',
                '4DFR']
        test_dir = os.path.dirname(__file__)
        base_dir = os.path.dirname(test_dir)
        executable = os.path.join(base_dir, "scripts", "propka31.py")
        env = { "PYTHONPATH" : base_dir }
        for pdb in pdbs:
            input_filename = os.path.join(test_dir, "pdb", pdb + ".pdb")
            output_filename = os.path.join(test_dir, pdb + ".out")
            output_file = open(output_filename, "w")
            call([sys.executable, executable, input_filename],
                    stdout=output_file, env=env)
            output_file.close()
            # Check pka predictions.
            ref = open(os.path.join(test_dir, "results", pdb + ".dat"))
            output = open(output_filename)
            atpka = False
            errors = []
            for line in output:
                if not atpka:
                    # Start testing pka values.
                    if "model-pKa" in line:
                        atpka = True
                    continue
                m = re.search('([0-9]+\.[0-9]+)', line)
                if not m:
                    break
                expected_value = float(ref.readline())
                value = float(m.group(0))
                if(value != expected_value):
                    identity = line[:m.start()].strip()
                    errors.append("%12s  %8.2f    %8.2f" %
                            (identity, expected_value, value))
            ref.close()
            output.close()
            if errors:
                error_header = "       Group  Expected  Calculated\n"
                self.Fail("Unexpected pKa values:\n" + error_header +
                          "\n".join(errors))
--- a/Tests/runtest.py
+++ b/Tests/runtest.py
@@ -9,11 +9,12 @@ from subprocess import call
 import os, re
 import sys
-pdbs = ['1FTJ-Chain-A',
+if __name__ == "__main__":
    pdbs = ['1FTJ-Chain-A',
            '1HPX',
            '4DFR']
-for pdb in pdbs:
+    for pdb in pdbs:
        print('')
        print('RUNNING '+pdb)
@@ -44,8 +45,3 @@ for pdb in pdbs:
                print(line)
                print(" "+"should be: "+str(r))
--- a/propka/atom.py
+++ b/propka/atom.py
@@ -4,6 +4,7 @@ from __future__ import print_function
 import string, propka.lib, propka.group
 from . import hybrid36
 class Atom:
    """
@@ -68,13 +69,16 @@ class Atom:
        if line:
            self.name = line[12:16].strip()
-            self.numb = int( line[ 6:11].strip() )
+            self.numb = int( hybrid36.decode(line[ 6:11]) )
            self.x = float( line[30:38].strip() )
            self.y = float( line[38:46].strip() )
            self.z = float( line[46:54].strip() )
            self.resNumb = int( line[22:26].strip() )
            self.resName = "%-3s" % (line[17:20].strip())
-            self.chainID = max(line[21], 'A') # set chain id to A in case of white space
+            self.chainID = line[21]
            # Set chain id to "_" if it is just white space.
            if not self.chainID.strip():
                self.chainID = '_'
            self.type = line[:6].strip().lower()
            if self.resName in ['DA ','DC ','DG ','DT ']:
--- a/propka/bonds.py
+++ b/propka/bonds.py
@@ -354,8 +354,9 @@ class bondmaker:
        #print('z range: [%6.2f;%6.2f] %6.2f'%(zmin,zmax,zlen))
        # how many boxes do we need in each dimension?
-        # NOTE: math.ceil() returns an int in python3 and a float in python2,
+        # NOTE: math.ceil() returns an int in python3 and a float in
-        # so we need to cast it to int for range() to work.
+        # python2, so we need to convert it to an int for range() to work in
        # both versions. See PEP 3141.
        self.no_box_x = max(1, int(math.ceil(xlen/box_size)))
        self.no_box_y = max(1, int(math.ceil(ylen/box_size)))
        self.no_box_z = max(1, int(math.ceil(zlen/box_size)))
@@ -369,7 +370,7 @@ class bondmaker:
        for x in range(self.no_box_x):
            for y in range(self.no_box_y):
                for z in range(self.no_box_z):
-                    self.boxes[self.box_key(x,y,z)] = []
+                    self.boxes[(x,y,z)] = []
        # put atoms into boxes
        for atom in atoms:
@@ -379,10 +380,8 @@ class bondmaker:
            self.put_atom_in_box(x,y,z,atom)
        # assign bonds
-        keys = self.boxes.keys()
+        for key, value in self.boxes.items():
-        for key in keys:
+            self.find_bonds_for_atoms(value)
            self.find_bonds_for_atoms(self.boxes[key])
        return
@@ -390,21 +389,21 @@ class bondmaker:
    def put_atom_in_box(self,x,y,z,atom):
        # atom in the x,y,z box and the up to 7 neighboring boxes on
        # one side of the x,y,z box in each dimension
        for bx in [x,x+1]:
            for by in [y,y+1]:
                for bz in [z,z+1]:
-                    key = self.box_key(bx,by,bz)
+                    key = (bx,by,bz)
-                    if key in self.boxes.keys():
+                    try:
                        self.boxes[key].append(atom)
                    except KeyError:
                        # No box exists for this coordinate
                        pass
                        #print(atom,'->',key,':',len(self.boxes[key]))
        return
    def box_key(self, x, y, z):
        return '%d-%d-%d'%(x,y,z)
    def has_bond(self, atom1, atom2):
        if atom1 in atom2.bonded_atoms or atom2 in atom1.bonded_atoms:
            return True
--- a/propka/calculations.py
+++ b/propka/calculations.py
@@ -364,12 +364,11 @@ def radial_volume_desolvation(parameters, group):
        # desolvation
        if sq_dist < parameters.desolv_cutoff_squared:
            # use a default relative volume of 1.0 if the volume of the element is not found in parameters
            dv = 1.0
            if atom.element in parameters.VanDerWaalsVolume.keys():
                dv = parameters.VanDerWaalsVolume[atom.element]
            # insert check for methyl groups
            if atom.element == 'C' and atom.name not in ['CA','C']:
                dv = parameters.VanDerWaalsVolume['C4']
            else:
                dv = parameters.VanDerWaalsVolume.get(atom.element, 1.0)
            dv_inc = dv/max(min_distance_4th, sq_dist*sq_dist)
 #            dv_inc = dv/(sq_dist*sq_dist) - dv/(parameters.desolv_cutoff_squared*parameters.desolv_cutoff_squared)
--- a/propka/conformation_container.py
+++ b/propka/conformation_container.py
@@ -390,19 +390,12 @@ class Conformation_container:
    def top_up(self, other):
        """ Tops up self with all atoms found in other but not in self """
        my_residue_labels = { a.residue_label for a in self.atoms }
        for atom in other.atoms:
-            if not self.have_atom(atom):
+            if not atom.residue_label in my_residue_labels:
                self.copy_atom(atom)
        return
    def have_atom(self, atom):
        res = False
        for a in self.atoms:
            if a.residue_label == atom.residue_label:
                res = True
                break
        return res
    def find_group(self, group):
        for g in self.groups:
            if g.atom.residue_label == group.atom.residue_label:
--- a/propka/determinants.py
+++ b/propka/determinants.py
@@ -168,11 +168,16 @@ def setIonDeterminants(conformation_container, version):
 def setBackBoneDeterminants(titratable_groups, backbone_groups, version):
    for titratable_group in titratable_groups:
        titratable_group_interaction_atoms = titratable_group.interaction_atoms_for_acids
        if not titratable_group_interaction_atoms:
            continue
        # find out which backbone groups this titratable is interacting with
        for backbone_group in backbone_groups:
            # find the interacting atoms
            backbone_interaction_atoms = backbone_group.get_interaction_atoms(titratable_group)
-            titratable_group_interaction_atoms = titratable_group.interaction_atoms_for_acids
+            if not backbone_interaction_atoms:
                continue
            # find the smallest distance
            [backbone_atom, distance, titratable_atom] = propka.calculations.get_smallest_distance(backbone_interaction_atoms,
--- a/propka/group.py
+++ b/propka/group.py
@@ -420,6 +420,9 @@ class Group:
            return self.interaction_atoms_for_acids #default is acid interaction atoms - cf. 3.0
    def set_center(self, atoms):
        if not atoms:
            raise ValueError("At least one atom must be specified")
        # reset center
        self.x = 0.0; self.y = 0.0; self.z = 0.0
@@ -614,8 +617,14 @@ class COO_group(Group):
        # Identify the two caroxyl oxygen atoms
        the_oxygens = self.atom.get_bonded_elements('O')
-        # set the center using the two oxygen carboxyl atoms
+        # set the center using the two oxygen carboxyl atoms (if present)
        if the_oxygens:
            self.set_center(the_oxygens)
        else:
            self.set_center([self.atom])
            # FIXME perhaps it would be better to ignore this group completely
            # if the oxygen is missing from this residue?
        self.set_interaction_atoms(the_oxygens, the_oxygens)
        return
@@ -636,7 +645,12 @@ class HIS_group(Group):
            my_protonator.protonate_atom(r)
        # set the center using the ring atoms
        if ring_atoms:
            self.set_center(ring_atoms)
        else:
            # Missing side-chain atoms
            self.set_center([self.atom])
            # FIXME perhaps it would be better to ignore this group completely?
        # find the hydrogens on the ring-nitrogens
        hydrogens = []
@@ -751,8 +765,13 @@ class Cterm_group(Group):
    def setup_atoms(self):
        # Identify the carbon and other oxygen carboxyl atoms
-        the_carbon = self.atom.get_bonded_elements('C')
+        the_carbons = self.atom.get_bonded_elements('C')
-        the_other_oxygen = the_carbon[0].get_bonded_elements('O')
+        if not the_carbons:
            self.set_center([self.atom])
            # FIXME perhaps it would be better to ignore this group completely
            # if the carbon is missing from this residue?
        else:
            the_other_oxygen = the_carbons[0].get_bonded_elements('O')
            the_other_oxygen.remove(self.atom)
            # set the center and interaction atoms
--- a/propka/hybrid36.py
+++ b/propka/hybrid36.py
@@ -0,0 +1,52 @@
 import string
 _hybrid36_upper_chars = set(string.ascii_uppercase)
 _hybrid36_lower_chars = set(string.ascii_lowercase)
 _hybrid36_digits = set(string.digits)
 _hybrid36_upper_set = _hybrid36_upper_chars | _hybrid36_digits
 _hybrid36_lower_set = _hybrid36_lower_chars | _hybrid36_digits
 def decode(input_string):
    """
    Convert an input string of a number in hybrid-36 format to an integer.
    """
    value_error_message = "invalid literal for hybrid-36 conversion: '%s'"
    original_input_string = input_string
    input_string = input_string.strip()
    # Manually handle negative sign.
    if input_string.startswith("-"):
        sign = -1
        input_string = input_string[1:]
    else:
        sign = 1
    if not len(input_string):
        raise ValueError(value_error_message % input_string)
    # See http://cci.lbl.gov/hybrid_36/ for documentation on the format.
    num_chars = len(input_string)
    first_char = input_string[0]
    if first_char in _hybrid36_digits:
        return sign * int(input_string)
    elif first_char in _hybrid36_upper_chars:
        reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars)
        _hybrid36_set = _hybrid36_upper_set
    elif first_char in _hybrid36_lower_chars:
        reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars)
        _hybrid36_set = _hybrid36_lower_set
    else:
        raise ValueError(value_error_message % original_input_string)
    # Check the validity of the input string: ASCII characters should be
    # either all uppercase or all lowercase.
    for c in input_string[1:]:
        if c not in _hybrid36_set:
            raise ValueError(value_error_message % original_input_string)
    # Convert with the int function.
    return sign * (int(input_string, 36) + reference)
--- a/propka/pdb.py
+++ b/propka/pdb.py
@@ -50,11 +50,17 @@ def protein_precheck(conformations, names):
    for name in names:
        atoms = conformations[name].atoms
-        res_ids = []
+        # Group the atoms by their residue:
-        [res_ids.append(resid_from_atom(a)) for a in atoms if not res_ids.count(resid_from_atom(a))]
+        atoms_by_residue = {}
        for a in atoms:
            if a.element != 'H':
                res_id = resid_from_atom(a)
                try:
                    atoms_by_residue[res_id].append(a)
                except KeyError:
                    atoms_by_residue[res_id] = [a]
-        for res_id in res_ids:
+        for res_id, res_atoms in atoms_by_residue.items():
            res_atoms = [a for a in atoms if resid_from_atom(a) == res_id and a.element != 'H']
            resname = res_atoms[0].resName
            residue_label = '%3s%5s'%(resname, res_id)
--- a/propka/protonate.py
+++ b/propka/protonate.py
@@ -47,7 +47,9 @@ class Protonate:
                                  'As':5,
                                  'Se':6,
                                  'Br':7,
-                                  'Kr':8}
+                                  'Kr':8,
                                  'I':7,
                                }
--- a/setup.py
+++ b/setup.py
@@ -48,4 +48,5 @@ using the tutorial http://propka.ki.ku.dk/~luca/wiki/index.php/PROPKA_3.1_Tutori
            ],
        },
      zip_safe=True,
      test_suite="Tests",
 )