diff --git a/Tests/hybrid36.py b/Tests/hybrid36.py new file mode 100644 index 0000000..4429e07 --- /dev/null +++ b/Tests/hybrid36.py @@ -0,0 +1,57 @@ +import unittest + +import propka.hybrid36 as hybrid36 + +class Hybrid36Test(unittest.TestCase): + def testDecode(self): + test_values = { + "99999": 99999, + "A0000": 100000, + "0": 0, + "9": 9, + "A": 10, + " ZZZZY": 43770014, + "ZZZZZ": 43770015, # ZZZZZ - A0000 + 100000 + "a0000": 43770016, + "zzzzz": 87440031, + "zzzzy": 87440030, + "99": 99, + "A0": 100, + "ZZ": 1035, + "zz": 1971, + "-99999": -99999, + "-A0000": -100000, + "-0": 0, + "-9": -9, + "-A": -10, + "-ZZZZY": -43770014, + "-ZZZZZ": -43770015, # ZZZZZ - A0000 + 100000 + "-a0000": -43770016, + "-zzzzz": -87440031, + "-zzzzy": -87440030, + "-99": -99, + "-A0": -100, + "-ZZ": -1035, + "-zz": -1971, + "PROPKA": 954495146, + "A001Z": 100071, + "B0000": 1779616, + } + + for k, v in test_values.iteritems(): + self.assertEqual(hybrid36.decode(k), v) + + def testErrors(self): + test_values = [ + "99X99", + "X9-99", + "XYZa", + "", + "-", + "!NotOk", + ] + + for v in test_values: + with self.assertRaises(ValueError) as e: + hybrid36.decode(v) + self.assertTrue(v in str(e.exception)) diff --git a/propka/atom.py b/propka/atom.py index b9abec4..31a83dc 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -4,6 +4,7 @@ from __future__ import print_function import string, propka.lib, propka.group +from . import hybrid36 class Atom: """ @@ -68,7 +69,7 @@ class Atom: if line: self.name = line[12:16].strip() - self.numb = int( line[ 6:11].strip() ) + self.numb = int( hybrid36.decode(line[ 6:11]) ) self.x = float( line[30:38].strip() ) self.y = float( line[38:46].strip() ) self.z = float( line[46:54].strip() ) diff --git a/propka/hybrid36.py b/propka/hybrid36.py new file mode 100644 index 0000000..f5b96af --- /dev/null +++ b/propka/hybrid36.py @@ -0,0 +1,52 @@ +import string + +_hybrid36_upper_chars = set(string.ascii_uppercase) +_hybrid36_lower_chars = set(string.ascii_lowercase) +_hybrid36_digits = set(string.digits) +_hybrid36_upper_set = _hybrid36_upper_chars | _hybrid36_digits +_hybrid36_lower_set = _hybrid36_lower_chars | _hybrid36_digits + +def decode(input_string): + """ + Convert an input string of a number in hybrid-36 format to an integer. + + """ + value_error_message = "invalid literal for hybrid-36 conversion: '%s'" + + original_input_string = input_string + input_string = input_string.strip() + + # Manually handle negative sign. + if input_string.startswith("-"): + sign = -1 + input_string = input_string[1:] + else: + sign = 1 + + if not len(input_string): + raise ValueError(value_error_message % input_string) + + # See http://cci.lbl.gov/hybrid_36/ for documentation on the format. + + num_chars = len(input_string) + first_char = input_string[0] + + if first_char in _hybrid36_digits: + return sign * int(input_string) + elif first_char in _hybrid36_upper_chars: + reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars) + _hybrid36_set = _hybrid36_upper_set + elif first_char in _hybrid36_lower_chars: + reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars) + _hybrid36_set = _hybrid36_lower_set + else: + raise ValueError(value_error_message % original_input_string) + + # Check the validity of the input string: ASCII characters should be + # either all uppercase or all lowercase. + for c in input_string[1:]: + if c not in _hybrid36_set: + raise ValueError(value_error_message % original_input_string) + + # Convert with the int function. + return sign * (int(input_string, 36) + reference)