377 lines
12 KiB
Python
377 lines
12 KiB
Python
"""
|
|
Set-up of a PROPKA calculation
|
|
==============================
|
|
|
|
Implements many of the main functions used to call PROPKA.
|
|
"""
|
|
|
|
import sys
|
|
import logging
|
|
import argparse
|
|
import pkg_resources
|
|
|
|
|
|
_LOGGER = logging.getLogger(__name__)
|
|
|
|
|
|
EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
|
|
'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
|
|
'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
|
|
'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
|
|
|
|
|
|
def protein_precheck(conformations, names):
|
|
"""Check protein for correct number of atoms, etc.
|
|
|
|
Args:
|
|
names: conformation names to check
|
|
"""
|
|
for name in names:
|
|
atoms = conformations[name].atoms
|
|
# Group the atoms by their residue:
|
|
atoms_by_residue = {}
|
|
for atom in atoms:
|
|
if atom.element != 'H':
|
|
res_id = resid_from_atom(atom)
|
|
try:
|
|
atoms_by_residue[res_id].append(atom)
|
|
except KeyError:
|
|
atoms_by_residue[res_id] = [atom]
|
|
for res_id, res_atoms in atoms_by_residue.items():
|
|
res_name = res_atoms[0].res_name
|
|
residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
|
|
# ignore ligand residues
|
|
if res_name not in EXPECTED_ATOM_NUMBERS:
|
|
continue
|
|
# check for c-terminal
|
|
if 'C-' in [a.terminal for a in res_atoms]:
|
|
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
|
|
str_ = ("Unexpected number ({num:d}) of atoms in residue "
|
|
"{res:s} in conformation {conf:s}".format(
|
|
num=len(res_atoms), res=residue_label,
|
|
conf=name))
|
|
_LOGGER.warning(str_)
|
|
continue
|
|
# check number of atoms in residue
|
|
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
|
|
str_ = ("Unexpected number ({num:d}) of atoms in residue "
|
|
"{res:s} in conformation {conf:s}".format(
|
|
num=len(res_atoms), res=residue_label,
|
|
conf=name))
|
|
_LOGGER.warning(str_)
|
|
|
|
|
|
def resid_from_atom(atom):
|
|
"""Return string with atom residue information.
|
|
|
|
Args:
|
|
atom: atom to generate string for
|
|
Returns
|
|
string
|
|
"""
|
|
return '{0:>4d} {1:s} {2:s}'.format(
|
|
atom.res_num, atom.chain_id, atom.icode)
|
|
|
|
|
|
def split_atoms_into_molecules(atoms):
|
|
"""Maps atoms into molecules.
|
|
|
|
Args:
|
|
atoms: list of atoms
|
|
Returns:
|
|
list of molecules
|
|
"""
|
|
molecules = []
|
|
while len(atoms) > 0:
|
|
initial_atom = atoms.pop()
|
|
molecules.append(make_molecule(initial_atom, atoms))
|
|
return molecules
|
|
|
|
|
|
def make_molecule(atom, atoms):
|
|
"""Make a molecule from atoms.
|
|
|
|
Args:
|
|
atom: one of the atoms
|
|
atoms: a list of the remaining atoms
|
|
Return:
|
|
list of atoms
|
|
"""
|
|
bonded_atoms = [a for a in atoms if atom in a.bonded_atoms]
|
|
res_atoms = [atom]
|
|
for bond_atom in bonded_atoms:
|
|
if bond_atom in atoms:
|
|
atoms.remove(bond_atom)
|
|
res_atoms.extend(make_molecule(bond_atom, atoms))
|
|
return res_atoms
|
|
|
|
|
|
def make_grid(min_, max_, step):
|
|
"""Make a grid across the specified tange.
|
|
|
|
TODO - figure out if this duplicates existing generators like `range` or
|
|
numpy function.
|
|
|
|
Args:
|
|
min_: minimum value of grid
|
|
max_: maximum value of grid
|
|
step: grid step size
|
|
"""
|
|
x = min_
|
|
while x <= max_:
|
|
yield x
|
|
x += step
|
|
|
|
|
|
def generate_combinations(interactions):
|
|
"""Generate combinations of interactions.
|
|
|
|
Args:
|
|
interactions: list of interactions
|
|
Returns:
|
|
list of combinations
|
|
"""
|
|
res = [[]]
|
|
for interaction in interactions:
|
|
res = make_combination(res, interaction)
|
|
res.remove([])
|
|
return res
|
|
|
|
|
|
def make_combination(combis, interaction):
|
|
"""Make a specific set of combinations.
|
|
|
|
Args:
|
|
combis: list of combinations
|
|
interaction: interaction to add to combinations
|
|
Returns:
|
|
list of combinations
|
|
"""
|
|
res = []
|
|
for combi in combis:
|
|
res.append(combi+[interaction])
|
|
res.append(combi)
|
|
return res
|
|
|
|
|
|
def parse_res_string(res_str):
|
|
"""Parse a residue string.
|
|
|
|
Args:
|
|
res_string: residue string in format "chain:resnum[inscode]"
|
|
Returns:
|
|
a tuple of (chain, resnum, inscode).
|
|
Raises:
|
|
ValueError if the input string is invalid.
|
|
"""
|
|
try:
|
|
chain, resnum_str = res_str.split(":")
|
|
except ValueError:
|
|
raise ValueError("Invalid residue string (must contain 2 "
|
|
"colon-separated values)")
|
|
try:
|
|
resnum = int(resnum_str)
|
|
except ValueError:
|
|
try:
|
|
resnum = int(resnum_str[:-1])
|
|
except ValueError:
|
|
raise ValueError("Invalid residue number (not an int)")
|
|
else:
|
|
inscode = resnum_str[-1]
|
|
else:
|
|
inscode = " "
|
|
return chain, resnum, inscode
|
|
|
|
|
|
def build_parser(parser=None):
|
|
"""Build an argument parser for PROPKA.
|
|
|
|
Args:
|
|
parser: existing parser. If this is not None, then the PROPKA parser
|
|
will be created as a subparser to this existing parser.
|
|
Otherwise, a new parser will be created.
|
|
Returns:
|
|
ArgumentParser object.
|
|
|
|
|
|
.. versionchanged:: 3.4.0
|
|
Argument `--generate-propka-input` has been removed as writing PROPKA
|
|
input files is no longer supported.
|
|
"""
|
|
if parser is not None:
|
|
group = parser.add_argument_group(title="PROPKA invocation options")
|
|
else:
|
|
parser = argparse.ArgumentParser(
|
|
description=("PROPKA predicts the pKa values of ionizable "
|
|
"groups in proteins and protein-ligand "
|
|
"complexes based in the 3D structure"),
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
# This is duck-typing at its finest
|
|
group = parser
|
|
group.add_argument("input_pdb", help="read data from <filename>")
|
|
group.add_argument(
|
|
"-f", "--file", action="append", dest="filenames", default=[],
|
|
help=(
|
|
"read data from <filename>, i.e. <filename> is added to arguments"
|
|
)
|
|
)
|
|
group.add_argument(
|
|
"-r", "--reference", dest="reference", default="neutral",
|
|
help=("setting which reference to use for stability calculations "
|
|
"[neutral/low-pH]"))
|
|
group.add_argument(
|
|
"-c", "--chain", action="append", dest="chains",
|
|
help=('creating the protein with only a specified chain. Specify '
|
|
'" " for chains without ID [all]'))
|
|
group.add_argument(
|
|
"-i", "--titrate_only", dest="titrate_only",
|
|
help=('Treat only the specified residues as titratable. Value should '
|
|
'be a comma-separated list of "chain:resnum" values; for '
|
|
'example: -i "A:10,A:11"'))
|
|
group.add_argument(
|
|
"-t", "--thermophile", action="append", dest="thermophiles",
|
|
help=("defining a thermophile filename; usually used in "
|
|
"'alignment-mutations'"))
|
|
group.add_argument(
|
|
"-a", "--alignment", action="append", dest="alignment",
|
|
help=("alignment file connecting <filename> and <thermophile> "
|
|
"[<thermophile>.pir]"))
|
|
group.add_argument(
|
|
"-m", "--mutation", action="append", dest="mutations",
|
|
help=("specifying mutation labels which is used to modify "
|
|
"<filename> according to, e.g. N25R/N181D"))
|
|
group.add_argument(
|
|
"-v", "--version", dest="version_label", default="Jan15",
|
|
help="specifying the sub-version of propka [Jan15/Dec19]")
|
|
group.add_argument(
|
|
"-p", "--parameters", dest="parameters",
|
|
default=pkg_resources.resource_filename(__name__, "propka.cfg"),
|
|
help="set the parameter file [{default:s}]")
|
|
try:
|
|
group.add_argument(
|
|
"--log-level",
|
|
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
|
help="logging level verbosity", default="INFO")
|
|
except argparse.ArgumentError:
|
|
# It is possible that --log-level has already been set by APBS
|
|
pass
|
|
group.add_argument(
|
|
"-o", "--pH", dest="pH", type=float, default=7.0,
|
|
help="setting pH-value used in e.g. stability calculations [7.0]")
|
|
group.add_argument(
|
|
"-w", "--window", dest="window", nargs=3, type=float,
|
|
default=(0.0, 14.0, 1.0),
|
|
help=("setting the pH-window to show e.g. stability profiles "
|
|
"[0.0, 14.0, 1.0]"))
|
|
group.add_argument(
|
|
"-g", "--grid", dest="grid", nargs=3, type=float,
|
|
default=(0.0, 14.0, 0.1),
|
|
help=("setting the pH-grid to calculate e.g. stability "
|
|
"related properties [0.0, 14.0, 0.1]"))
|
|
group.add_argument(
|
|
"--mutator", dest="mutator",
|
|
help=(
|
|
"setting approach for mutating <filename> "
|
|
"[alignment/scwrl/jackal]"
|
|
)
|
|
)
|
|
group.add_argument(
|
|
"--mutator-option", dest="mutator_options", action="append",
|
|
help="setting property for mutator [e.g. type=\"side-chain\"]")
|
|
group.add_argument(
|
|
"-d", "--display-coupled-residues", dest="display_coupled_residues",
|
|
action="store_true",
|
|
help=("Displays alternative pKa values due "
|
|
"to coupling of titratable groups"))
|
|
group.add_argument(
|
|
"-l", "--reuse-ligand-mol2-files", dest="reuse_ligand_mol2_file",
|
|
action="store_true", default=False,
|
|
help=("Reuses the ligand mol2 files allowing the user to alter "
|
|
"ligand bond orders"))
|
|
group.add_argument(
|
|
"-k", "--keep-protons", dest="keep_protons", action="store_true",
|
|
help="Keep protons in input file", default=False)
|
|
group.add_argument(
|
|
"-q", "--quiet", action="store_const", const="WARNING",
|
|
dest="log_level", help="suppress non-warning messages")
|
|
group.add_argument(
|
|
"--protonate-all", dest="protonate_all", action="store_true",
|
|
help="Protonate all atoms (will not influence pKa calculation)",
|
|
default=False)
|
|
return parser
|
|
|
|
|
|
def loadOptions(args=None):
|
|
"""
|
|
Load the arguments parser with options. Note that verbosity is set as soon
|
|
as this function is invoked.
|
|
|
|
Arguments:
|
|
args: list of arguments
|
|
Returns:
|
|
argparse namespace
|
|
"""
|
|
# loading the parser
|
|
parser = build_parser()
|
|
# parsing and returning options and arguments
|
|
options = parser.parse_args(args)
|
|
|
|
# adding specified filenames to arguments
|
|
options.filenames.append(options.input_pdb)
|
|
# Convert titrate_only string to a list of (chain, resnum) items:
|
|
if options.titrate_only is not None:
|
|
res_list = []
|
|
for res_str in options.titrate_only.split(','):
|
|
try:
|
|
chain, resnum, inscode = parse_res_string(res_str)
|
|
except ValueError:
|
|
_LOGGER.critical(
|
|
'Invalid residue string: "{0:s}"'.format(res_str))
|
|
sys.exit(1)
|
|
res_list.append((chain, resnum, inscode))
|
|
options.titrate_only = res_list
|
|
# Set the no-print variable
|
|
level = getattr(logging, options.log_level)
|
|
_LOGGER.setLevel(level)
|
|
# done!
|
|
return options
|
|
|
|
|
|
def make_tidy_atom_label(name, element):
|
|
"""Returns a 'tidier' atom label for printing to the new PDB file.
|
|
|
|
Args:
|
|
name: atom name
|
|
element: atom element
|
|
Returns:
|
|
string
|
|
"""
|
|
if len(name) > 4: # if longer than 4, just truncate the name
|
|
label = name[0:4]
|
|
elif len(name) == 4: # if length is 4, otherwise use the name as it is
|
|
label = name
|
|
else: # if less than 4 characters long, insert white space as needed
|
|
if len(element) == 1:
|
|
label = ' {0:<3s}'.format(name)
|
|
else: # The element should occupy the two first chars
|
|
label = '{0:<4s}'.format(name)
|
|
return label
|
|
|
|
|
|
def get_sorted_configurations(configuration_keys):
|
|
"""Extract and sort configurations.
|
|
|
|
Args:
|
|
configuration_keys: list of configuration keys
|
|
Returns:
|
|
list of configurations
|
|
"""
|
|
configurations = list(configuration_keys)
|
|
configurations.sort(key=configuration_compare)
|
|
return configurations
|
|
|
|
|
|
def configuration_compare(conf):
|
|
"""TODO - figure out what this function does."""
|
|
return 100*int(conf[1:-2]) + ord(conf[-1])
|