handle structures with 100,000 or more atoms
- the pdb format requires a 5-character field for the atom number; the hybrid-36 format allows up to a ridiculous number of atoms
This commit is contained in:
57
Tests/hybrid36.py
Normal file
57
Tests/hybrid36.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
import unittest
|
||||||
|
|
||||||
|
import propka.hybrid36 as hybrid36
|
||||||
|
|
||||||
|
class Hybrid36Test(unittest.TestCase):
|
||||||
|
def testDecode(self):
|
||||||
|
test_values = {
|
||||||
|
"99999": 99999,
|
||||||
|
"A0000": 100000,
|
||||||
|
"0": 0,
|
||||||
|
"9": 9,
|
||||||
|
"A": 10,
|
||||||
|
" ZZZZY": 43770014,
|
||||||
|
"ZZZZZ": 43770015, # ZZZZZ - A0000 + 100000
|
||||||
|
"a0000": 43770016,
|
||||||
|
"zzzzz": 87440031,
|
||||||
|
"zzzzy": 87440030,
|
||||||
|
"99": 99,
|
||||||
|
"A0": 100,
|
||||||
|
"ZZ": 1035,
|
||||||
|
"zz": 1971,
|
||||||
|
"-99999": -99999,
|
||||||
|
"-A0000": -100000,
|
||||||
|
"-0": 0,
|
||||||
|
"-9": -9,
|
||||||
|
"-A": -10,
|
||||||
|
"-ZZZZY": -43770014,
|
||||||
|
"-ZZZZZ": -43770015, # ZZZZZ - A0000 + 100000
|
||||||
|
"-a0000": -43770016,
|
||||||
|
"-zzzzz": -87440031,
|
||||||
|
"-zzzzy": -87440030,
|
||||||
|
"-99": -99,
|
||||||
|
"-A0": -100,
|
||||||
|
"-ZZ": -1035,
|
||||||
|
"-zz": -1971,
|
||||||
|
"PROPKA": 954495146,
|
||||||
|
"A001Z": 100071,
|
||||||
|
"B0000": 1779616,
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v in test_values.iteritems():
|
||||||
|
self.assertEqual(hybrid36.decode(k), v)
|
||||||
|
|
||||||
|
def testErrors(self):
|
||||||
|
test_values = [
|
||||||
|
"99X99",
|
||||||
|
"X9-99",
|
||||||
|
"XYZa",
|
||||||
|
"",
|
||||||
|
"-",
|
||||||
|
"!NotOk",
|
||||||
|
]
|
||||||
|
|
||||||
|
for v in test_values:
|
||||||
|
with self.assertRaises(ValueError) as e:
|
||||||
|
hybrid36.decode(v)
|
||||||
|
self.assertTrue(v in str(e.exception))
|
||||||
@@ -4,6 +4,7 @@ from __future__ import print_function
|
|||||||
|
|
||||||
import string, propka.lib, propka.group
|
import string, propka.lib, propka.group
|
||||||
|
|
||||||
|
from . import hybrid36
|
||||||
|
|
||||||
class Atom:
|
class Atom:
|
||||||
"""
|
"""
|
||||||
@@ -68,7 +69,7 @@ class Atom:
|
|||||||
|
|
||||||
if line:
|
if line:
|
||||||
self.name = line[12:16].strip()
|
self.name = line[12:16].strip()
|
||||||
self.numb = int( line[ 6:11].strip() )
|
self.numb = int( hybrid36.decode(line[ 6:11]) )
|
||||||
self.x = float( line[30:38].strip() )
|
self.x = float( line[30:38].strip() )
|
||||||
self.y = float( line[38:46].strip() )
|
self.y = float( line[38:46].strip() )
|
||||||
self.z = float( line[46:54].strip() )
|
self.z = float( line[46:54].strip() )
|
||||||
|
|||||||
52
propka/hybrid36.py
Normal file
52
propka/hybrid36.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
import string
|
||||||
|
|
||||||
|
_hybrid36_upper_chars = set(string.ascii_uppercase)
|
||||||
|
_hybrid36_lower_chars = set(string.ascii_lowercase)
|
||||||
|
_hybrid36_digits = set(string.digits)
|
||||||
|
_hybrid36_upper_set = _hybrid36_upper_chars | _hybrid36_digits
|
||||||
|
_hybrid36_lower_set = _hybrid36_lower_chars | _hybrid36_digits
|
||||||
|
|
||||||
|
def decode(input_string):
|
||||||
|
"""
|
||||||
|
Convert an input string of a number in hybrid-36 format to an integer.
|
||||||
|
|
||||||
|
"""
|
||||||
|
value_error_message = "invalid literal for hybrid-36 conversion: '%s'"
|
||||||
|
|
||||||
|
original_input_string = input_string
|
||||||
|
input_string = input_string.strip()
|
||||||
|
|
||||||
|
# Manually handle negative sign.
|
||||||
|
if input_string.startswith("-"):
|
||||||
|
sign = -1
|
||||||
|
input_string = input_string[1:]
|
||||||
|
else:
|
||||||
|
sign = 1
|
||||||
|
|
||||||
|
if not len(input_string):
|
||||||
|
raise ValueError(value_error_message % input_string)
|
||||||
|
|
||||||
|
# See http://cci.lbl.gov/hybrid_36/ for documentation on the format.
|
||||||
|
|
||||||
|
num_chars = len(input_string)
|
||||||
|
first_char = input_string[0]
|
||||||
|
|
||||||
|
if first_char in _hybrid36_digits:
|
||||||
|
return sign * int(input_string)
|
||||||
|
elif first_char in _hybrid36_upper_chars:
|
||||||
|
reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars)
|
||||||
|
_hybrid36_set = _hybrid36_upper_set
|
||||||
|
elif first_char in _hybrid36_lower_chars:
|
||||||
|
reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars)
|
||||||
|
_hybrid36_set = _hybrid36_lower_set
|
||||||
|
else:
|
||||||
|
raise ValueError(value_error_message % original_input_string)
|
||||||
|
|
||||||
|
# Check the validity of the input string: ASCII characters should be
|
||||||
|
# either all uppercase or all lowercase.
|
||||||
|
for c in input_string[1:]:
|
||||||
|
if c not in _hybrid36_set:
|
||||||
|
raise ValueError(value_error_message % original_input_string)
|
||||||
|
|
||||||
|
# Convert with the int function.
|
||||||
|
return sign * (int(input_string, 36) + reference)
|
||||||
Reference in New Issue
Block a user