Merge pull request #6 from schrodinger/titrate_only

Titrate only option
2014-08-27 14:53:54 +02:00
parent b617175f77 8084b31393
commit 6e1b0365e0
5 changed files with 114 additions and 31 deletions
--- a/Tests/runtest.py
+++ b/Tests/runtest.py
@@ -10,19 +10,37 @@ import os, re
 import sys

 if __name__ == "__main__":
-    pdbs = ['1FTJ-Chain-A',
-            '1HPX',
-            '4DFR']
+    # A list of input structures and command-line arguments to be passed in
+    # to PROPKA for each:
+    pdbs = [('1FTJ-Chain-A', []),
+            ('1HPX', []),
+            ('4DFR', []),
+            ('3SGB', []),
+            ('3SGB-subset', ['--titrate_only', 'E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139']),
+           ]

-    for pdb in pdbs:
+    for pdb, args in pdbs:
        print('')
        print('RUNNING '+pdb)

        # Run pka calculation
-        call([sys.executable, '../scripts/propka31.py','pdb/'+pdb+'.pdb'], stdout = open(pdb+'.out', 'w+'))
+        fh = open(pdb + '.out', 'w')
+        cmd = [sys.executable, '../scripts/propka31.py','pdb/%s.pdb' % pdb] + args
+        ret = call(cmd, stdout=fh, stderr=fh)
+        if ret != 0:
+            print(" ERR:")
+            print(" Failed to execute PROPKA on %s" % pdb)
+            print(" See: %s.out" % pdb)
+            sys.exit(1)

-        # Test pka predictiona
-        result = open('results/'+pdb+'.dat','r')
+        # Test pka predictions
+        result_file = 'results/%s.dat' % pdb
+        if not os.path.isfile(result_file):
+            print(" ERR:")
+            print(" file not found: %s" % result_file)
+            sys.exit(1)
+
+        result = open(result_file,'r')
        atpka = False
        for line in open(pdb+'.pka', 'r').readlines():
            if not atpka:
@@ -37,11 +55,13 @@ if __name__ == "__main__":
                atpka = False
                continue

-            r = float(result.readline())
+            expected_value = float(result.readline())
            m = re.search('([0-9]+\.[0-9]+)', line)
+            value = float(m.group(0))

-            if(float(m.group(0)) != r):
+            if value != expected_value:
                print(" ERR:")
                print(line)
-                print(" "+"should be: "+str(r))
+                print(" %s should be: %s" % (value, expected_value))
+                sys.exit(1)

--- a/propka/conformation_container.py
+++ b/propka/conformation_container.py
@@ -31,11 +31,13 @@ class Conformation_container:
        for atom in self.get_non_hydrogen_atoms():
            # has this atom been checked for groups?
            if atom.groups_extracted == 0:
-                self.validate_group(propka.group.is_group(self.parameters, atom))
-            # if the atom has been checked in a another conformation, check if it has a
-            # group that should be used in this conformation as well
-            elif atom.group:
-                self.validate_group(atom.group)
+                group = propka.group.is_group(self.parameters, atom)
+            else:
+                group = atom.group
+                # if the atom has been checked in a another conformation, check if it has a
+                # group that should be used in this conformation as well
+            if group:
+                self.setup_and_add_group(group)

        return

@@ -74,8 +76,7 @@ class Conformation_container:

    def find_covalently_coupled_groups(self):
        """ Finds covalently coupled groups and sets common charge centres if needed """
-        for group in [ group for group in self.groups if group.titratable]:
-
+        for group in self.get_titratable_groups():
            # Find covalently bonded groups
            bonded_groups = self.find_bonded_titratable_groups(group.atom, 1, group.atom)

@@ -131,8 +132,9 @@ class Conformation_container:
        return res


-    def validate_group(self, group):
+    def setup_and_add_group(self, group):
        """ Checks if we want to include this group in the calculations """
+
        # Is it recognized as a group at all?
        if not group:
            return
@@ -140,18 +142,25 @@ class Conformation_container:
        # Other checks (include ligands, which chains etc.)

        # if all ok, accept the group
-        self.accept_group(group)
-        return
+        self.init_group(group)
+        self.groups.append(group)

-
-    def accept_group(self, group):
+    def init_group(self, group):
+        """
+        Initialize the given Group object.
+        """
        # set up the group
        group.parameters=self.parameters
        group.setup()

-        # and store it
-        self.groups.append(group)
-        return
+        # If --titrate_only option is set, make non-specified residues un-titratable:
+        titrate_only = self.molecular_container.options.titrate_only
+        if titrate_only is not None:
+            at = group.atom
+            if not (at.chainID, at.resNumb, at.icode) in titrate_only:
+                group.titratable = False
+                if group.residue_type == 'CYS':
+                    group.exclude_cys_from_results = True


    #
@@ -335,8 +344,14 @@ class Conformation_container:
    def get_titratable_groups(self):
        return [group for group in self.groups if group.titratable]

-    def get_titratable_groups_and_cysteine_bridges(self):
-        return [group for group in self.groups if group.titratable or group.residue_type == 'CYS']
+    def get_groups_for_calculations(self):
+        """
+        Returns a list of groups that should be included in results report.
+        If --titrate_only option is specified, only residues that are titratable
+        and are in that list are included; otherwise all titratable residues
+        and CYS residues are included.
+        """
+        return [group for group in self.groups if group.use_in_calculations()]

    def get_acids(self):
        return [group for group in self.groups if (group.residue_type in self.parameters.acid_list
--- a/propka/group.py
+++ b/propka/group.py
@@ -338,6 +338,7 @@ class Group:
        res.covalently_coupled_groups = self.covalently_coupled_groups
        res.non_covalently_coupled_groups = self.non_covalently_coupled_groups
        res.titratable = self.titratable
+        res.exclude_cys_from_results = self.exclude_cys_from_results
        res.charge = self.charge
        return res

@@ -367,7 +368,7 @@ class Group:

        if self.model_pka_set and not self.atom.cysteine_bridge:
            self.titratable = True
-
+        self.exclude_cys_from_results = False

        return

@@ -606,6 +607,15 @@ class Group:

        return charge

+    def use_in_calculations(self):
+        """
+        Whether this group should be included in the results report. If
+        --titrate_only option is specified, only residues that are titratable
+        and are in that list are included; otherwise all titratable residues
+        and CYS residues are included.
+        """
+        return self.titratable or (self.residue_type == 'CYS' and \
+                                   not self.exclude_cys_from_results)


 class COO_group(Group):
--- a/propka/lib.py
+++ b/propka/lib.py
@@ -75,6 +75,29 @@ def make_combination(combis, interaction):
    return res


+def parse_res_string(res_str):
+    """
+    Parse the residue string, in format "chain:resnum[inscode]", and return
+    a tuple of (chain, resnum, inscode). Raises ValueError if the input
+    string is invalid.
+    """
+    try:
+        chain, resnum_str = res_str.split(":")
+    except ValueError:
+        raise ValueError("Invalid residue string (must contain 2 colon-separated values)")
+    try:
+        resnum = int(resnum_str)
+    except ValueError:
+        try:
+            resnum = int(resnum_str[:-1])
+        except ValueError:
+            raise ValueError("Invalid residue number (not an int)")
+        else:
+            inscode = resnum_str[-1]
+    else:
+        inscode = " "
+    return chain, resnum, inscode
+

 def loadOptions():
    """
@@ -94,7 +117,11 @@ def loadOptions():
    parser.add_option("-r", "--reference", dest="reference", default="neutral",
           help="setting which reference to use for stability calculations [neutral/low-pH]")
    parser.add_option("-c", "--chain", action="append", dest="chains",
-           help="creating the protein with only a specified chain, note, chains without ID are labeled 'A' [all]")
+           help='creating the protein with only a specified chain. Specify " " for chains without ID [all]')
+    parser.add_option("-i", "--titrate_only", dest="titrate_only",
+           help='Treat only the specified residues as titratable. Value should '
+           'be a comma-separated list of "chain:resnum" values; for example: '
+           '-i "A:10,A:11"')
    parser.add_option("-t", "--thermophile", action="append", dest="thermophiles",
           help="defining a thermophile filename; usually used in 'alignment-mutations'")
    parser.add_option("-a", "--alignment", action="append", dest="alignment",
@@ -147,7 +174,17 @@ def loadOptions():
      print("Warning: no pdbfile provided")
      #sys.exit(9)

-
+    # Convert titrate_only string to a list of (chain, resnum) items:
+    if options.titrate_only is not None:
+        res_list = []
+        for res_str in options.titrate_only.split(','):
+            try:
+                chain, resnum, inscode = parse_res_string(res_str)
+            except ValueError:
+                print('Invalid residue string: "%s"' % res_str)
+                sys.exit(1)
+            res_list.append((chain, resnum, inscode))
+        options.titrate_only = res_list

    # done!
    return options, args
--- a/propka/molecular_container.py
+++ b/propka/molecular_container.py
@@ -139,7 +139,8 @@ class Molecular_container:
                                                                                parameters=self.conformations[self.conformation_names[0]].parameters,
                                                                                molecular_container=self)

-        for group in self.conformations[self.conformation_names[0]].get_titratable_groups_and_cysteine_bridges():
+        container = self.conformations[self.conformation_names[0]]
+        for group in container.get_groups_for_calculations():
            # new group to hold average values
            avr_group = group.clone()
            # sum up all groups ...