Merge pull request #3 from schrodinger/master

merge from schrodinger
This commit is contained in:
Jimmy Charnley Kromann
2014-04-04 17:08:52 +02:00
14 changed files with 278 additions and 77 deletions

0
Tests/__init__.py Normal file
View File

57
Tests/hybrid36.py Normal file
View File

@@ -0,0 +1,57 @@
import unittest
import propka.hybrid36 as hybrid36
class Hybrid36Test(unittest.TestCase):
def testDecode(self):
test_values = {
"99999": 99999,
"A0000": 100000,
"0": 0,
"9": 9,
"A": 10,
" ZZZZY": 43770014,
"ZZZZZ": 43770015, # ZZZZZ - A0000 + 100000
"a0000": 43770016,
"zzzzz": 87440031,
"zzzzy": 87440030,
"99": 99,
"A0": 100,
"ZZ": 1035,
"zz": 1971,
"-99999": -99999,
"-A0000": -100000,
"-0": 0,
"-9": -9,
"-A": -10,
"-ZZZZY": -43770014,
"-ZZZZZ": -43770015, # ZZZZZ - A0000 + 100000
"-a0000": -43770016,
"-zzzzz": -87440031,
"-zzzzy": -87440030,
"-99": -99,
"-A0": -100,
"-ZZ": -1035,
"-zz": -1971,
"PROPKA": 954495146,
"A001Z": 100071,
"B0000": 1779616,
}
for k, v in test_values.iteritems():
self.assertEqual(hybrid36.decode(k), v)
def testErrors(self):
test_values = [
"99X99",
"X9-99",
"XYZa",
"",
"-",
"!NotOk",
]
for v in test_values:
with self.assertRaises(ValueError) as e:
hybrid36.decode(v)
self.assertTrue(v in str(e.exception))

68
Tests/pkacalc_test.py Normal file
View File

@@ -0,0 +1,68 @@
import os
import re
from subprocess import call
import sys
import unittest
# Setting this up as a direct translation of the original runtest.py script
# that will be run as part of 'python setup.py test'. This takes on the
# order of 10s to run.
class SystemTest(unittest.TestCase):
"""
Run the program and compare against reference results.
"""
def test_pka_calc(self):
pdbs = ['1FTJ-Chain-A',
'1HPX',
'4DFR']
test_dir = os.path.dirname(__file__)
base_dir = os.path.dirname(test_dir)
executable = os.path.join(base_dir, "scripts", "propka31.py")
env = { "PYTHONPATH" : base_dir }
for pdb in pdbs:
input_filename = os.path.join(test_dir, "pdb", pdb + ".pdb")
output_filename = os.path.join(test_dir, pdb + ".out")
output_file = open(output_filename, "w")
call([sys.executable, executable, input_filename],
stdout=output_file, env=env)
output_file.close()
# Check pka predictions.
ref = open(os.path.join(test_dir, "results", pdb + ".dat"))
output = open(output_filename)
atpka = False
errors = []
for line in output:
if not atpka:
# Start testing pka values.
if "model-pKa" in line:
atpka = True
continue
m = re.search('([0-9]+\.[0-9]+)', line)
if not m:
break
expected_value = float(ref.readline())
value = float(m.group(0))
if(value != expected_value):
identity = line[:m.start()].strip()
errors.append("%12s %8.2f %8.2f" %
(identity, expected_value, value))
ref.close()
output.close()
if errors:
error_header = " Group Expected Calculated\n"
self.Fail("Unexpected pKa values:\n" + error_header +
"\n".join(errors))

View File

@@ -9,11 +9,12 @@ from subprocess import call
import os, re import os, re
import sys import sys
pdbs = ['1FTJ-Chain-A', if __name__ == "__main__":
pdbs = ['1FTJ-Chain-A',
'1HPX', '1HPX',
'4DFR'] '4DFR']
for pdb in pdbs: for pdb in pdbs:
print('') print('')
print('RUNNING '+pdb) print('RUNNING '+pdb)
@@ -44,8 +45,3 @@ for pdb in pdbs:
print(line) print(line)
print(" "+"should be: "+str(r)) print(" "+"should be: "+str(r))

View File

@@ -4,6 +4,7 @@ from __future__ import print_function
import string, propka.lib, propka.group import string, propka.lib, propka.group
from . import hybrid36
class Atom: class Atom:
""" """
@@ -68,13 +69,16 @@ class Atom:
if line: if line:
self.name = line[12:16].strip() self.name = line[12:16].strip()
self.numb = int( line[ 6:11].strip() ) self.numb = int( hybrid36.decode(line[ 6:11]) )
self.x = float( line[30:38].strip() ) self.x = float( line[30:38].strip() )
self.y = float( line[38:46].strip() ) self.y = float( line[38:46].strip() )
self.z = float( line[46:54].strip() ) self.z = float( line[46:54].strip() )
self.resNumb = int( line[22:26].strip() ) self.resNumb = int( line[22:26].strip() )
self.resName = "%-3s" % (line[17:20].strip()) self.resName = "%-3s" % (line[17:20].strip())
self.chainID = max(line[21], 'A') # set chain id to A in case of white space self.chainID = line[21]
# Set chain id to "_" if it is just white space.
if not self.chainID.strip():
self.chainID = '_'
self.type = line[:6].strip().lower() self.type = line[:6].strip().lower()
if self.resName in ['DA ','DC ','DG ','DT ']: if self.resName in ['DA ','DC ','DG ','DT ']:

View File

@@ -354,8 +354,9 @@ class bondmaker:
#print('z range: [%6.2f;%6.2f] %6.2f'%(zmin,zmax,zlen)) #print('z range: [%6.2f;%6.2f] %6.2f'%(zmin,zmax,zlen))
# how many boxes do we need in each dimension? # how many boxes do we need in each dimension?
# NOTE: math.ceil() returns an int in python3 and a float in python2, # NOTE: math.ceil() returns an int in python3 and a float in
# so we need to cast it to int for range() to work. # python2, so we need to convert it to an int for range() to work in
# both versions. See PEP 3141.
self.no_box_x = max(1, int(math.ceil(xlen/box_size))) self.no_box_x = max(1, int(math.ceil(xlen/box_size)))
self.no_box_y = max(1, int(math.ceil(ylen/box_size))) self.no_box_y = max(1, int(math.ceil(ylen/box_size)))
self.no_box_z = max(1, int(math.ceil(zlen/box_size))) self.no_box_z = max(1, int(math.ceil(zlen/box_size)))
@@ -369,7 +370,7 @@ class bondmaker:
for x in range(self.no_box_x): for x in range(self.no_box_x):
for y in range(self.no_box_y): for y in range(self.no_box_y):
for z in range(self.no_box_z): for z in range(self.no_box_z):
self.boxes[self.box_key(x,y,z)] = [] self.boxes[(x,y,z)] = []
# put atoms into boxes # put atoms into boxes
for atom in atoms: for atom in atoms:
@@ -379,10 +380,8 @@ class bondmaker:
self.put_atom_in_box(x,y,z,atom) self.put_atom_in_box(x,y,z,atom)
# assign bonds # assign bonds
keys = self.boxes.keys() for key, value in self.boxes.items():
for key in keys: self.find_bonds_for_atoms(value)
self.find_bonds_for_atoms(self.boxes[key])
return return
@@ -390,21 +389,21 @@ class bondmaker:
def put_atom_in_box(self,x,y,z,atom): def put_atom_in_box(self,x,y,z,atom):
# atom in the x,y,z box and the up to 7 neighboring boxes on # atom in the x,y,z box and the up to 7 neighboring boxes on
# one side of the x,y,z box in each dimension # one side of the x,y,z box in each dimension
for bx in [x,x+1]: for bx in [x,x+1]:
for by in [y,y+1]: for by in [y,y+1]:
for bz in [z,z+1]: for bz in [z,z+1]:
key = self.box_key(bx,by,bz) key = (bx,by,bz)
if key in self.boxes.keys(): try:
self.boxes[key].append(atom) self.boxes[key].append(atom)
except KeyError:
# No box exists for this coordinate
pass
#print(atom,'->',key,':',len(self.boxes[key])) #print(atom,'->',key,':',len(self.boxes[key]))
return return
def box_key(self, x, y, z):
return '%d-%d-%d'%(x,y,z)
def has_bond(self, atom1, atom2): def has_bond(self, atom1, atom2):
if atom1 in atom2.bonded_atoms or atom2 in atom1.bonded_atoms: if atom1 in atom2.bonded_atoms or atom2 in atom1.bonded_atoms:
return True return True

View File

@@ -364,12 +364,11 @@ def radial_volume_desolvation(parameters, group):
# desolvation # desolvation
if sq_dist < parameters.desolv_cutoff_squared: if sq_dist < parameters.desolv_cutoff_squared:
# use a default relative volume of 1.0 if the volume of the element is not found in parameters # use a default relative volume of 1.0 if the volume of the element is not found in parameters
dv = 1.0
if atom.element in parameters.VanDerWaalsVolume.keys():
dv = parameters.VanDerWaalsVolume[atom.element]
# insert check for methyl groups # insert check for methyl groups
if atom.element == 'C' and atom.name not in ['CA','C']: if atom.element == 'C' and atom.name not in ['CA','C']:
dv = parameters.VanDerWaalsVolume['C4'] dv = parameters.VanDerWaalsVolume['C4']
else:
dv = parameters.VanDerWaalsVolume.get(atom.element, 1.0)
dv_inc = dv/max(min_distance_4th, sq_dist*sq_dist) dv_inc = dv/max(min_distance_4th, sq_dist*sq_dist)
# dv_inc = dv/(sq_dist*sq_dist) - dv/(parameters.desolv_cutoff_squared*parameters.desolv_cutoff_squared) # dv_inc = dv/(sq_dist*sq_dist) - dv/(parameters.desolv_cutoff_squared*parameters.desolv_cutoff_squared)

View File

@@ -390,19 +390,12 @@ class Conformation_container:
def top_up(self, other): def top_up(self, other):
""" Tops up self with all atoms found in other but not in self """ """ Tops up self with all atoms found in other but not in self """
my_residue_labels = { a.residue_label for a in self.atoms }
for atom in other.atoms: for atom in other.atoms:
if not self.have_atom(atom): if not atom.residue_label in my_residue_labels:
self.copy_atom(atom) self.copy_atom(atom)
return return
def have_atom(self, atom):
res = False
for a in self.atoms:
if a.residue_label == atom.residue_label:
res = True
break
return res
def find_group(self, group): def find_group(self, group):
for g in self.groups: for g in self.groups:
if g.atom.residue_label == group.atom.residue_label: if g.atom.residue_label == group.atom.residue_label:

View File

@@ -168,11 +168,16 @@ def setIonDeterminants(conformation_container, version):
def setBackBoneDeterminants(titratable_groups, backbone_groups, version): def setBackBoneDeterminants(titratable_groups, backbone_groups, version):
for titratable_group in titratable_groups: for titratable_group in titratable_groups:
titratable_group_interaction_atoms = titratable_group.interaction_atoms_for_acids
if not titratable_group_interaction_atoms:
continue
# find out which backbone groups this titratable is interacting with # find out which backbone groups this titratable is interacting with
for backbone_group in backbone_groups: for backbone_group in backbone_groups:
# find the interacting atoms # find the interacting atoms
backbone_interaction_atoms = backbone_group.get_interaction_atoms(titratable_group) backbone_interaction_atoms = backbone_group.get_interaction_atoms(titratable_group)
titratable_group_interaction_atoms = titratable_group.interaction_atoms_for_acids if not backbone_interaction_atoms:
continue
# find the smallest distance # find the smallest distance
[backbone_atom, distance, titratable_atom] = propka.calculations.get_smallest_distance(backbone_interaction_atoms, [backbone_atom, distance, titratable_atom] = propka.calculations.get_smallest_distance(backbone_interaction_atoms,

View File

@@ -420,6 +420,9 @@ class Group:
return self.interaction_atoms_for_acids #default is acid interaction atoms - cf. 3.0 return self.interaction_atoms_for_acids #default is acid interaction atoms - cf. 3.0
def set_center(self, atoms): def set_center(self, atoms):
if not atoms:
raise ValueError("At least one atom must be specified")
# reset center # reset center
self.x = 0.0; self.y = 0.0; self.z = 0.0 self.x = 0.0; self.y = 0.0; self.z = 0.0
@@ -614,8 +617,14 @@ class COO_group(Group):
# Identify the two caroxyl oxygen atoms # Identify the two caroxyl oxygen atoms
the_oxygens = self.atom.get_bonded_elements('O') the_oxygens = self.atom.get_bonded_elements('O')
# set the center using the two oxygen carboxyl atoms # set the center using the two oxygen carboxyl atoms (if present)
if the_oxygens:
self.set_center(the_oxygens) self.set_center(the_oxygens)
else:
self.set_center([self.atom])
# FIXME perhaps it would be better to ignore this group completely
# if the oxygen is missing from this residue?
self.set_interaction_atoms(the_oxygens, the_oxygens) self.set_interaction_atoms(the_oxygens, the_oxygens)
return return
@@ -636,7 +645,12 @@ class HIS_group(Group):
my_protonator.protonate_atom(r) my_protonator.protonate_atom(r)
# set the center using the ring atoms # set the center using the ring atoms
if ring_atoms:
self.set_center(ring_atoms) self.set_center(ring_atoms)
else:
# Missing side-chain atoms
self.set_center([self.atom])
# FIXME perhaps it would be better to ignore this group completely?
# find the hydrogens on the ring-nitrogens # find the hydrogens on the ring-nitrogens
hydrogens = [] hydrogens = []
@@ -751,8 +765,13 @@ class Cterm_group(Group):
def setup_atoms(self): def setup_atoms(self):
# Identify the carbon and other oxygen carboxyl atoms # Identify the carbon and other oxygen carboxyl atoms
the_carbon = self.atom.get_bonded_elements('C') the_carbons = self.atom.get_bonded_elements('C')
the_other_oxygen = the_carbon[0].get_bonded_elements('O') if not the_carbons:
self.set_center([self.atom])
# FIXME perhaps it would be better to ignore this group completely
# if the carbon is missing from this residue?
else:
the_other_oxygen = the_carbons[0].get_bonded_elements('O')
the_other_oxygen.remove(self.atom) the_other_oxygen.remove(self.atom)
# set the center and interaction atoms # set the center and interaction atoms

52
propka/hybrid36.py Normal file
View File

@@ -0,0 +1,52 @@
import string
_hybrid36_upper_chars = set(string.ascii_uppercase)
_hybrid36_lower_chars = set(string.ascii_lowercase)
_hybrid36_digits = set(string.digits)
_hybrid36_upper_set = _hybrid36_upper_chars | _hybrid36_digits
_hybrid36_lower_set = _hybrid36_lower_chars | _hybrid36_digits
def decode(input_string):
"""
Convert an input string of a number in hybrid-36 format to an integer.
"""
value_error_message = "invalid literal for hybrid-36 conversion: '%s'"
original_input_string = input_string
input_string = input_string.strip()
# Manually handle negative sign.
if input_string.startswith("-"):
sign = -1
input_string = input_string[1:]
else:
sign = 1
if not len(input_string):
raise ValueError(value_error_message % input_string)
# See http://cci.lbl.gov/hybrid_36/ for documentation on the format.
num_chars = len(input_string)
first_char = input_string[0]
if first_char in _hybrid36_digits:
return sign * int(input_string)
elif first_char in _hybrid36_upper_chars:
reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars)
_hybrid36_set = _hybrid36_upper_set
elif first_char in _hybrid36_lower_chars:
reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars)
_hybrid36_set = _hybrid36_lower_set
else:
raise ValueError(value_error_message % original_input_string)
# Check the validity of the input string: ASCII characters should be
# either all uppercase or all lowercase.
for c in input_string[1:]:
if c not in _hybrid36_set:
raise ValueError(value_error_message % original_input_string)
# Convert with the int function.
return sign * (int(input_string, 36) + reference)

View File

@@ -50,11 +50,17 @@ def protein_precheck(conformations, names):
for name in names: for name in names:
atoms = conformations[name].atoms atoms = conformations[name].atoms
res_ids = [] # Group the atoms by their residue:
[res_ids.append(resid_from_atom(a)) for a in atoms if not res_ids.count(resid_from_atom(a))] atoms_by_residue = {}
for a in atoms:
if a.element != 'H':
res_id = resid_from_atom(a)
try:
atoms_by_residue[res_id].append(a)
except KeyError:
atoms_by_residue[res_id] = [a]
for res_id in res_ids: for res_id, res_atoms in atoms_by_residue.items():
res_atoms = [a for a in atoms if resid_from_atom(a) == res_id and a.element != 'H']
resname = res_atoms[0].resName resname = res_atoms[0].resName
residue_label = '%3s%5s'%(resname, res_id) residue_label = '%3s%5s'%(resname, res_id)

View File

@@ -47,7 +47,9 @@ class Protonate:
'As':5, 'As':5,
'Se':6, 'Se':6,
'Br':7, 'Br':7,
'Kr':8} 'Kr':8,
'I':7,
}

View File

@@ -48,4 +48,5 @@ using the tutorial http://propka.ki.ku.dk/~luca/wiki/index.php/PROPKA_3.1_Tutori
], ],
}, },
zip_safe=True, zip_safe=True,
test_suite="Tests",
) )