Remove cyclic import based on I/O in pdb.py.

Partially addresses https://github.com/jensengroup/propka-3.1/issues/49
This commit is contained in:
Nathan Baker
2020-05-30 10:00:31 -07:00
parent 397d5e10aa
commit b597a6f257
6 changed files with 173 additions and 168 deletions

View File

@@ -1,96 +1,58 @@
"""Read and parse PDB-like input files.""" """Input routines."""
import propka.lib from pkg_resources import resource_filename
from propka.lib import warning
from propka.atom import Atom from propka.atom import Atom
from propka.group import initialize_atom_group
from propka.conformation_container import ConformationContainer from propka.conformation_container import ConformationContainer
from propka.group import initialize_atom_group
EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6, def open_file_for_reading(input_file):
'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8, """Open file or file-like stream for reading.
'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
TODO - convert this to a context manager
def read_pdb(pdb_file, parameters, molecule):
"""Parse a PDB file.
Args: Args:
pdb_file: file to read input_file: path to file or file-like object. If file-like object,
parameters: parameters to guide parsing then will attempt fseek(0).
molecule: molecular container
Returns:
list with elements:
1. list of conformations
2. list of names
""" """
conformations = {}
# read in all atoms in the file
lines = get_atom_lines_from_pdb(
pdb_file, ignore_residues=parameters.ignore_residues,
keep_protons=molecule.options.keep_protons,
chains=molecule.options.chains)
for (name, atom) in lines:
if not name in conformations.keys():
conformations[name] = ConformationContainer(
name=name, parameters=parameters, molecular_container=molecule)
conformations[name].add_atom(atom)
# make a sorted list of conformation names
names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
return [conformations, names]
def protein_precheck(conformations, names):
"""Check protein for correct number of atoms, etc.
Args:
names: conformation names to check
"""
for name in names:
atoms = conformations[name].atoms
# Group the atoms by their residue:
atoms_by_residue = {}
for atom in atoms:
if atom.element != 'H':
res_id = resid_from_atom(atom)
try: try:
atoms_by_residue[res_id].append(atom) input_file.fseek(0)
except KeyError: return input_file
atoms_by_residue[res_id] = [atom] except AttributeError:
for res_id, res_atoms in atoms_by_residue.items(): pass
res_name = res_atoms[0].res_name
residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id) try:
# ignore ligand residues file_ = open(input_file, 'rt')
if res_name not in EXPECTED_ATOM_NUMBERS: except:
continue raise IOError('Cannot find file {0:s}'.format(input_file))
# check for c-terminal return file_
if 'C-' in [a.terminal for a in res_atoms]:
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
str_ = ("Unexpected number ({num:d}) of atoms in residue "
"{res:s} in conformation {conf:s}".format(
num=len(res_atoms), res=residue_label,
conf=name))
warning(str_)
continue
# check number of atoms in residue
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
str_ = ("Unexpected number ({num:d}) of atoms in residue "
"{res:s} in conformation {conf:s}".format(
num=len(res_atoms), res=residue_label,
conf=name))
warning(str_)
def resid_from_atom(atom): def read_parameter_file(input_file, parameters):
"""Return string with atom residue information. """Read a parameter file.
Args: Args:
atom: atom to generate string for input_file: input file to read
Returns parameters: Parameters object
string Returns:
updated Parameters object
""" """
return '{0:>4d} {1:s} {2:s}'.format( # try to locate the parameter file
atom.res_num, atom.chain_id, atom.icode) try:
ifile = resource_filename(__name__, input_file)
input_ = open_file_for_reading(ifile)
except (IOError, FileNotFoundError, ValueError):
input_ = open_file_for_reading(input_file)
for line in input_:
parameters.parse_line(line)
return parameters
def conformation_sorter(conf):
"""TODO - figure out what this function does."""
model = int(conf[:-1])
altloc = conf[-1:]
return model*100+ord(altloc)
def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False, def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
@@ -104,7 +66,7 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
tags: tags of lines that include atoms tags: tags of lines that include atoms
chains: list of chains chains: list of chains
""" """
lines = propka.lib.open_file_for_reading(pdb_file).readlines() lines = open_file_for_reading(pdb_file).readlines()
nterm_residue = 'next_residue' nterm_residue = 'next_residue'
old_residue = None old_residue = None
terminal = None terminal = None
@@ -179,7 +141,7 @@ def read_input(input_file, parameters, molecule):
molecular_container=molecule) molecular_container=molecule)
conformations[name].add_atom(atom) conformations[name].add_atom(atom)
# make a sorted list of conformation names # make a sorted list of conformation names
names = sorted(conformations.keys(), key=propka.lib.conformation_sorter) names = sorted(conformations.keys(), key=conformation_sorter)
return [conformations, names] return [conformations, names]
@@ -192,7 +154,7 @@ def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']):
Yields: Yields:
conformation container, list of atoms conformation container, list of atoms
""" """
lines = propka.lib.open_file_for_reading(input_file).readlines() lines = open_file_for_reading(input_file).readlines()
conformation = '' conformation = ''
atoms = {} atoms = {}
numbers = [] numbers = []
@@ -246,3 +208,32 @@ def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']):
# prepare for next conformation # prepare for next conformation
atoms = {} atoms = {}
numbers = [] numbers = []
def read_pdb(pdb_file, parameters, molecule):
"""Parse a PDB file.
Args:
pdb_file: file to read
parameters: parameters to guide parsing
molecule: molecular container
Returns:
list with elements:
1. list of conformations
2. list of names
"""
conformations = {}
# read in all atoms in the file
lines = get_atom_lines_from_pdb(
pdb_file, ignore_residues=parameters.ignore_residues,
keep_protons=molecule.options.keep_protons,
chains=molecule.options.chains)
for (name, atom) in lines:
if not name in conformations.keys():
conformations[name] = ConformationContainer(
name=name, parameters=parameters, molecular_container=molecule)
conformations[name].add_atom(atom)
# make a sorted list of conformation names
names = sorted(conformations.keys(), key=conformation_sorter)
return [conformations, names]

View File

@@ -11,56 +11,63 @@ _STDOUT_HANDLER.setFormatter(logging.Formatter("%(message)s"))
_LOGGER.addHandler(_STDOUT_HANDLER) _LOGGER.addHandler(_STDOUT_HANDLER)
def open_file_for_reading(input_file): EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
"""Open file or file-like stream for reading. 'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
TODO - convert this to a context manager
def protein_precheck(conformations, names):
"""Check protein for correct number of atoms, etc.
Args: Args:
input_file: path to file or file-like object. If file-like object, names: conformation names to check
then will attempt fseek(0).
""" """
for name in names:
atoms = conformations[name].atoms
# Group the atoms by their residue:
atoms_by_residue = {}
for atom in atoms:
if atom.element != 'H':
res_id = resid_from_atom(atom)
try: try:
input_file.fseek(0) atoms_by_residue[res_id].append(atom)
return input_file except KeyError:
except AttributeError: atoms_by_residue[res_id] = [atom]
pass for res_id, res_atoms in atoms_by_residue.items():
res_name = res_atoms[0].res_name
try: residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
file_ = open(input_file, 'rt') # ignore ligand residues
except: if res_name not in EXPECTED_ATOM_NUMBERS:
raise IOError('Cannot find file {0:s}'.format(input_file)) continue
return file_ # check for c-terminal
if 'C-' in [a.terminal for a in res_atoms]:
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
str_ = ("Unexpected number ({num:d}) of atoms in residue "
"{res:s} in conformation {conf:s}".format(
num=len(res_atoms), res=residue_label,
conf=name))
warning(str_)
continue
# check number of atoms in residue
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
str_ = ("Unexpected number ({num:d}) of atoms in residue "
"{res:s} in conformation {conf:s}".format(
num=len(res_atoms), res=residue_label,
conf=name))
warning(str_)
def open_file_for_writing(input_file): def resid_from_atom(atom):
"""Open file or file-like stream for writing. """Return string with atom residue information.
TODO - convert this to a context manager.
Args: Args:
input_file: path to file or file-like object. If file-like object, atom: atom to generate string for
then will attempt to get file mode. Returns
string
""" """
try: return '{0:>4d} {1:s} {2:s}'.format(
mode = input_file.mode atom.res_num, atom.chain_id, atom.icode)
if not ("w" in mode or "a" in mode or "+" in mode):
raise IOError("File/stream not open for writing")
return input_file
except AttributeError:
pass
try:
file_ = open(input_file, 'wt')
except FileNotFoundError:
raise Exception('Could not open {0:s}'.format(input_file))
return file_
def conformation_sorter(conf):
"""TODO - figure out what this function does."""
model = int(conf[:-1])
altloc = conf[-1:]
return model*100+ord(altloc)
def split_atoms_into_molecules(atoms): def split_atoms_into_molecules(atoms):
@@ -354,19 +361,6 @@ def configuration_compare(conf):
return 100*int(conf[1:-2]) + ord(conf[-1]) return 100*int(conf[1:-2]) + ord(conf[-1])
def write_file(filename, lines):
"""Writes a new file.
Args:
filename: name of file
lines: lines to write to file
"""
file_ = open_file_for_writing(filename)
for line in lines:
file_.write("{0:s}\n".format(line))
file_.close()
def _args_to_str(arg_list): def _args_to_str(arg_list):
"""Summarize list of arguments in string. """Summarize list of arguments in string.

View File

@@ -5,7 +5,6 @@ import sys
import propka.molecular_container import propka.molecular_container
import propka.calculations import propka.calculations
import propka.parameters import propka.parameters
import propka.pdb
from propka.output import write_mol2_for_atoms from propka.output import write_mol2_for_atoms
from propka.lib import info, warning from propka.lib import info, warning

View File

@@ -2,10 +2,11 @@
import os import os
import sys import sys
import propka.version import propka.version
from propka.pdb import read_input from propka.input import read_pdb, read_input, read_parameter_file
from propka.parameters import Parameters
from propka.output import write_input from propka.output import write_input
from propka.conformation_container import ConformationContainer from propka.conformation_container import ConformationContainer
from propka.lib import info, warning, make_grid from propka.lib import info, warning, protein_precheck, make_grid
# TODO - these are constants whose origins are a little murky # TODO - these are constants whose origins are a little murky
@@ -38,11 +39,12 @@ class Molecular_container:
self.file = os.path.split(input_file)[1] self.file = os.path.split(input_file)[1]
self.name = self.file[0:self.file.rfind('.')] self.name = self.file[0:self.file.rfind('.')]
input_file_extension = input_file[input_file.rfind('.'):] input_file_extension = input_file[input_file.rfind('.'):]
# set the version parameters = Parameters()
if options: if options:
parameters = propka.parameters.Parameters(self.options.parameters) parameters = read_parameter_file(
self.options.parameters, parameters)
else: else:
parameters = propka.parameters.Parameters('propka.cfg') parameters = read_parameter_file('propka.cfg', parameters)
try: try:
version_class = getattr(propka.version, parameters.version) version_class = getattr(propka.version, parameters.version)
self.version = version_class(parameters) self.version = version_class(parameters)
@@ -56,15 +58,15 @@ class Molecular_container:
# input is a pdb file. read in atoms and top up containers to make # input is a pdb file. read in atoms and top up containers to make
# sure that all atoms are present in all conformations # sure that all atoms are present in all conformations
[self.conformations, self.conformation_names] = ( [self.conformations, self.conformation_names] = (
propka.pdb.read_pdb(input_file, self.version.parameters, self)) read_pdb(input_file, self.version.parameters, self))
if len(self.conformations) == 0: if len(self.conformations) == 0:
info('Error: The pdb file does not seems to contain any ' info('Error: The pdb file does not seems to contain any '
'molecular conformations') 'molecular conformations')
sys.exit(-1) sys.exit(-1)
self.top_up_conformations() self.top_up_conformations()
# make a structure precheck # make a structure precheck
propka.pdb.protein_precheck(self.conformations, protein_precheck(
self.conformation_names) self.conformations, self.conformation_names)
# set up atom bonding and protonation # set up atom bonding and protonation
self.version.setup_bonding_and_protonation(self) self.version.setup_bonding_and_protonation(self)
# Extract groups # Extract groups
@@ -79,9 +81,8 @@ class Molecular_container:
write_input(self, filename) write_input(self, filename)
elif input_file_extension == '.propka_input': elif input_file_extension == '.propka_input':
#input is a propka_input file #input is a propka_input file
[self.conformations, self.conformation_names] = ( [self.conformations, self.conformation_names] = read_input(
propka.pdb.read_input(input_file, self.version.parameters, input_file, self.version.parameters, self)
self))
# Extract groups - this merely sets up the groups found in the # Extract groups - this merely sets up the groups found in the
# input file # input file
self.extract_groups() self.extract_groups()

View File

@@ -1,6 +1,42 @@
"""Output routines.""" """Output routines."""
from datetime import date from datetime import date
from propka.lib import info, open_file_for_writing from propka.lib import info
def open_file_for_writing(input_file):
"""Open file or file-like stream for writing.
TODO - convert this to a context manager.
Args:
input_file: path to file or file-like object. If file-like object,
then will attempt to get file mode.
"""
try:
mode = input_file.mode
if not ("w" in mode or "a" in mode or "+" in mode):
raise IOError("File/stream not open for writing")
return input_file
except AttributeError:
pass
try:
file_ = open(input_file, 'wt')
except FileNotFoundError:
raise Exception('Could not open {0:s}'.format(input_file))
return file_
def write_file(filename, lines):
"""Writes a new file.
Args:
filename: name of file
lines: lines to write to file
"""
file_ = open_file_for_writing(filename)
for line in lines:
file_.write("{0:s}\n".format(line))
file_.close()
def print_header(): def print_header():

View File

@@ -35,7 +35,7 @@ STRINGS = ['version', 'output_file_tag', 'ligand_typing', 'pH', 'reference']
class Parameters: class Parameters:
"""PROPKA parameter class.""" """PROPKA parameter class."""
def __init__(self, parameter_file): def __init__(self):
"""Initialize parameter class. """Initialize parameter class.
Args: Args:
@@ -52,22 +52,6 @@ class Parameters:
self.CYS_CYS_exception = None self.CYS_CYS_exception = None
# These functions set up remaining data structures implicitly # These functions set up remaining data structures implicitly
self.set_up_data_structures() self.set_up_data_structures()
self.read_parameters(parameter_file)
def read_parameters(self, file_):
"""Read parameters from file.
Args:
file_: file to read
"""
# try to locate the parameters file
try:
ifile = pkg_resources.resource_filename(__name__, file_)
input_ = lib.open_file_for_reading(ifile)
except (IOError, FileNotFoundError, ValueError):
input_ = lib.open_file_for_reading(file_)
for line in input_:
self.parse_line(line)
def parse_line(self, line): def parse_line(self, line):
"""Parse parameter file line.""" """Parse parameter file line."""