Files
propka/propka/hybrid36.py
2020-05-24 11:21:55 -07:00

62 lines
1.9 KiB
Python

"""Provides an alternative PDB format that can transparently encode larger atom numbers.
http://cci.lbl.gov/hybrid_36/
"""
import string
_HYBRID36_UPPER_CHARS = set(string.ascii_uppercase)
_HYBRID36_LOWER_CHARS = set(string.ascii_lowercase)
_HYBRID36_DIGITS = set(string.digits)
_HYBRID36_UPPER_SET = _HYBRID36_UPPER_CHARS | _HYBRID36_DIGITS
_HYBRID36_LOWER_SET = _HYBRID36_LOWER_CHARS | _HYBRID36_DIGITS
def decode(input_string):
"""Convert an input string of a number in hybrid-36 format to an integer.
Args:
input_string: input string
Returns:
integer
"""
value_error_message = "invalid literal for hybrid-36 conversion: '%s'"
original_input_string = input_string
input_string = input_string.strip()
# Manually handle negative sign.
if input_string.startswith("-"):
sign = -1
input_string = input_string[1:]
else:
sign = 1
if len(input_string) == 0:
raise ValueError(value_error_message % input_string)
# See http://cci.lbl.gov/hybrid_36/ for documentation on the format.
num_chars = len(input_string)
first_char = input_string[0]
if first_char in _HYBRID36_DIGITS:
return sign * int(input_string)
elif first_char in _HYBRID36_UPPER_CHARS:
reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars)
_hybrid36_set = _HYBRID36_UPPER_SET
elif first_char in _HYBRID36_LOWER_CHARS:
reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars)
_hybrid36_set = _HYBRID36_LOWER_SET
else:
raise ValueError(value_error_message % original_input_string)
# Check the validity of the input string: ASCII characters should be
# either all uppercase or all lowercase.
for char in input_string[1:]:
if char not in _hybrid36_set:
raise ValueError(value_error_message % original_input_string)
# Convert with the int function.
return sign * (int(input_string, 36) + reference)