handle structures with 100,000 or more atoms

- the pdb format requires a 5-character field for the atom number; the
  hybrid-36 format allows up to a ridiculous number of atoms
This commit is contained in:
Mike Beachy
2014-02-22 17:42:49 -05:00
parent 61bb875248
commit e0ed5da44b
3 changed files with 111 additions and 1 deletions

57
Tests/hybrid36.py Normal file
View File

@@ -0,0 +1,57 @@
import unittest
import propka.hybrid36 as hybrid36
class Hybrid36Test(unittest.TestCase):
def testDecode(self):
test_values = {
"99999": 99999,
"A0000": 100000,
"0": 0,
"9": 9,
"A": 10,
" ZZZZY": 43770014,
"ZZZZZ": 43770015, # ZZZZZ - A0000 + 100000
"a0000": 43770016,
"zzzzz": 87440031,
"zzzzy": 87440030,
"99": 99,
"A0": 100,
"ZZ": 1035,
"zz": 1971,
"-99999": -99999,
"-A0000": -100000,
"-0": 0,
"-9": -9,
"-A": -10,
"-ZZZZY": -43770014,
"-ZZZZZ": -43770015, # ZZZZZ - A0000 + 100000
"-a0000": -43770016,
"-zzzzz": -87440031,
"-zzzzy": -87440030,
"-99": -99,
"-A0": -100,
"-ZZ": -1035,
"-zz": -1971,
"PROPKA": 954495146,
"A001Z": 100071,
"B0000": 1779616,
}
for k, v in test_values.iteritems():
self.assertEqual(hybrid36.decode(k), v)
def testErrors(self):
test_values = [
"99X99",
"X9-99",
"XYZa",
"",
"-",
"!NotOk",
]
for v in test_values:
with self.assertRaises(ValueError) as e:
hybrid36.decode(v)
self.assertTrue(v in str(e.exception))

View File

@@ -4,6 +4,7 @@ from __future__ import print_function
import string, propka.lib, propka.group
from . import hybrid36
class Atom:
"""
@@ -68,7 +69,7 @@ class Atom:
if line:
self.name = line[12:16].strip()
self.numb = int( line[ 6:11].strip() )
self.numb = int( hybrid36.decode(line[ 6:11]) )
self.x = float( line[30:38].strip() )
self.y = float( line[38:46].strip() )
self.z = float( line[46:54].strip() )

52
propka/hybrid36.py Normal file
View File

@@ -0,0 +1,52 @@
import string
_hybrid36_upper_chars = set(string.ascii_uppercase)
_hybrid36_lower_chars = set(string.ascii_lowercase)
_hybrid36_digits = set(string.digits)
_hybrid36_upper_set = _hybrid36_upper_chars | _hybrid36_digits
_hybrid36_lower_set = _hybrid36_lower_chars | _hybrid36_digits
def decode(input_string):
"""
Convert an input string of a number in hybrid-36 format to an integer.
"""
value_error_message = "invalid literal for hybrid-36 conversion: '%s'"
original_input_string = input_string
input_string = input_string.strip()
# Manually handle negative sign.
if input_string.startswith("-"):
sign = -1
input_string = input_string[1:]
else:
sign = 1
if not len(input_string):
raise ValueError(value_error_message % input_string)
# See http://cci.lbl.gov/hybrid_36/ for documentation on the format.
num_chars = len(input_string)
first_char = input_string[0]
if first_char in _hybrid36_digits:
return sign * int(input_string)
elif first_char in _hybrid36_upper_chars:
reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars)
_hybrid36_set = _hybrid36_upper_set
elif first_char in _hybrid36_lower_chars:
reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars)
_hybrid36_set = _hybrid36_lower_set
else:
raise ValueError(value_error_message % original_input_string)
# Check the validity of the input string: ASCII characters should be
# either all uppercase or all lowercase.
for c in input_string[1:]:
if c not in _hybrid36_set:
raise ValueError(value_error_message % original_input_string)
# Convert with the int function.
return sign * (int(input_string, 36) + reference)