handle structures with 100,000 or more atoms
- the pdb format requires a 5-character field for the atom number; the hybrid-36 format allows up to a ridiculous number of atoms
This commit is contained in:
57
Tests/hybrid36.py
Normal file
57
Tests/hybrid36.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import unittest
|
||||
|
||||
import propka.hybrid36 as hybrid36
|
||||
|
||||
class Hybrid36Test(unittest.TestCase):
|
||||
def testDecode(self):
|
||||
test_values = {
|
||||
"99999": 99999,
|
||||
"A0000": 100000,
|
||||
"0": 0,
|
||||
"9": 9,
|
||||
"A": 10,
|
||||
" ZZZZY": 43770014,
|
||||
"ZZZZZ": 43770015, # ZZZZZ - A0000 + 100000
|
||||
"a0000": 43770016,
|
||||
"zzzzz": 87440031,
|
||||
"zzzzy": 87440030,
|
||||
"99": 99,
|
||||
"A0": 100,
|
||||
"ZZ": 1035,
|
||||
"zz": 1971,
|
||||
"-99999": -99999,
|
||||
"-A0000": -100000,
|
||||
"-0": 0,
|
||||
"-9": -9,
|
||||
"-A": -10,
|
||||
"-ZZZZY": -43770014,
|
||||
"-ZZZZZ": -43770015, # ZZZZZ - A0000 + 100000
|
||||
"-a0000": -43770016,
|
||||
"-zzzzz": -87440031,
|
||||
"-zzzzy": -87440030,
|
||||
"-99": -99,
|
||||
"-A0": -100,
|
||||
"-ZZ": -1035,
|
||||
"-zz": -1971,
|
||||
"PROPKA": 954495146,
|
||||
"A001Z": 100071,
|
||||
"B0000": 1779616,
|
||||
}
|
||||
|
||||
for k, v in test_values.iteritems():
|
||||
self.assertEqual(hybrid36.decode(k), v)
|
||||
|
||||
def testErrors(self):
|
||||
test_values = [
|
||||
"99X99",
|
||||
"X9-99",
|
||||
"XYZa",
|
||||
"",
|
||||
"-",
|
||||
"!NotOk",
|
||||
]
|
||||
|
||||
for v in test_values:
|
||||
with self.assertRaises(ValueError) as e:
|
||||
hybrid36.decode(v)
|
||||
self.assertTrue(v in str(e.exception))
|
||||
@@ -4,6 +4,7 @@ from __future__ import print_function
|
||||
|
||||
import string, propka.lib, propka.group
|
||||
|
||||
from . import hybrid36
|
||||
|
||||
class Atom:
|
||||
"""
|
||||
@@ -68,7 +69,7 @@ class Atom:
|
||||
|
||||
if line:
|
||||
self.name = line[12:16].strip()
|
||||
self.numb = int( line[ 6:11].strip() )
|
||||
self.numb = int( hybrid36.decode(line[ 6:11]) )
|
||||
self.x = float( line[30:38].strip() )
|
||||
self.y = float( line[38:46].strip() )
|
||||
self.z = float( line[46:54].strip() )
|
||||
|
||||
52
propka/hybrid36.py
Normal file
52
propka/hybrid36.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import string
|
||||
|
||||
_hybrid36_upper_chars = set(string.ascii_uppercase)
|
||||
_hybrid36_lower_chars = set(string.ascii_lowercase)
|
||||
_hybrid36_digits = set(string.digits)
|
||||
_hybrid36_upper_set = _hybrid36_upper_chars | _hybrid36_digits
|
||||
_hybrid36_lower_set = _hybrid36_lower_chars | _hybrid36_digits
|
||||
|
||||
def decode(input_string):
|
||||
"""
|
||||
Convert an input string of a number in hybrid-36 format to an integer.
|
||||
|
||||
"""
|
||||
value_error_message = "invalid literal for hybrid-36 conversion: '%s'"
|
||||
|
||||
original_input_string = input_string
|
||||
input_string = input_string.strip()
|
||||
|
||||
# Manually handle negative sign.
|
||||
if input_string.startswith("-"):
|
||||
sign = -1
|
||||
input_string = input_string[1:]
|
||||
else:
|
||||
sign = 1
|
||||
|
||||
if not len(input_string):
|
||||
raise ValueError(value_error_message % input_string)
|
||||
|
||||
# See http://cci.lbl.gov/hybrid_36/ for documentation on the format.
|
||||
|
||||
num_chars = len(input_string)
|
||||
first_char = input_string[0]
|
||||
|
||||
if first_char in _hybrid36_digits:
|
||||
return sign * int(input_string)
|
||||
elif first_char in _hybrid36_upper_chars:
|
||||
reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars)
|
||||
_hybrid36_set = _hybrid36_upper_set
|
||||
elif first_char in _hybrid36_lower_chars:
|
||||
reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars)
|
||||
_hybrid36_set = _hybrid36_lower_set
|
||||
else:
|
||||
raise ValueError(value_error_message % original_input_string)
|
||||
|
||||
# Check the validity of the input string: ASCII characters should be
|
||||
# either all uppercase or all lowercase.
|
||||
for c in input_string[1:]:
|
||||
if c not in _hybrid36_set:
|
||||
raise ValueError(value_error_message % original_input_string)
|
||||
|
||||
# Convert with the int function.
|
||||
return sign * (int(input_string, 36) + reference)
|
||||
Reference in New Issue
Block a user