Remove cyclic import based on I/O in pdb.py.

Partially addresses https://github.com/jensengroup/propka-3.1/issues/49
This commit is contained in:
Nathan Baker
2020-05-30 10:00:31 -07:00
parent 397d5e10aa
commit b597a6f257
6 changed files with 173 additions and 168 deletions

View File

@@ -1,96 +1,58 @@
"""Read and parse PDB-like input files."""
import propka.lib
from propka.lib import warning
"""Input routines."""
from pkg_resources import resource_filename
from propka.atom import Atom
from propka.group import initialize_atom_group
from propka.conformation_container import ConformationContainer
from propka.group import initialize_atom_group
EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
def open_file_for_reading(input_file):
"""Open file or file-like stream for reading.
def read_pdb(pdb_file, parameters, molecule):
"""Parse a PDB file.
TODO - convert this to a context manager
Args:
pdb_file: file to read
parameters: parameters to guide parsing
molecule: molecular container
input_file: path to file or file-like object. If file-like object,
then will attempt fseek(0).
"""
try:
input_file.fseek(0)
return input_file
except AttributeError:
pass
try:
file_ = open(input_file, 'rt')
except:
raise IOError('Cannot find file {0:s}'.format(input_file))
return file_
def read_parameter_file(input_file, parameters):
"""Read a parameter file.
Args:
input_file: input file to read
parameters: Parameters object
Returns:
list with elements:
1. list of conformations
2. list of names
updated Parameters object
"""
conformations = {}
# read in all atoms in the file
lines = get_atom_lines_from_pdb(
pdb_file, ignore_residues=parameters.ignore_residues,
keep_protons=molecule.options.keep_protons,
chains=molecule.options.chains)
for (name, atom) in lines:
if not name in conformations.keys():
conformations[name] = ConformationContainer(
name=name, parameters=parameters, molecular_container=molecule)
conformations[name].add_atom(atom)
# make a sorted list of conformation names
names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
return [conformations, names]
# try to locate the parameter file
try:
ifile = resource_filename(__name__, input_file)
input_ = open_file_for_reading(ifile)
except (IOError, FileNotFoundError, ValueError):
input_ = open_file_for_reading(input_file)
for line in input_:
parameters.parse_line(line)
return parameters
def protein_precheck(conformations, names):
"""Check protein for correct number of atoms, etc.
Args:
names: conformation names to check
"""
for name in names:
atoms = conformations[name].atoms
# Group the atoms by their residue:
atoms_by_residue = {}
for atom in atoms:
if atom.element != 'H':
res_id = resid_from_atom(atom)
try:
atoms_by_residue[res_id].append(atom)
except KeyError:
atoms_by_residue[res_id] = [atom]
for res_id, res_atoms in atoms_by_residue.items():
res_name = res_atoms[0].res_name
residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
# ignore ligand residues
if res_name not in EXPECTED_ATOM_NUMBERS:
continue
# check for c-terminal
if 'C-' in [a.terminal for a in res_atoms]:
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
str_ = ("Unexpected number ({num:d}) of atoms in residue "
"{res:s} in conformation {conf:s}".format(
num=len(res_atoms), res=residue_label,
conf=name))
warning(str_)
continue
# check number of atoms in residue
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
str_ = ("Unexpected number ({num:d}) of atoms in residue "
"{res:s} in conformation {conf:s}".format(
num=len(res_atoms), res=residue_label,
conf=name))
warning(str_)
def resid_from_atom(atom):
"""Return string with atom residue information.
Args:
atom: atom to generate string for
Returns
string
"""
return '{0:>4d} {1:s} {2:s}'.format(
atom.res_num, atom.chain_id, atom.icode)
def conformation_sorter(conf):
"""TODO - figure out what this function does."""
model = int(conf[:-1])
altloc = conf[-1:]
return model*100+ord(altloc)
def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
@@ -104,7 +66,7 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
tags: tags of lines that include atoms
chains: list of chains
"""
lines = propka.lib.open_file_for_reading(pdb_file).readlines()
lines = open_file_for_reading(pdb_file).readlines()
nterm_residue = 'next_residue'
old_residue = None
terminal = None
@@ -179,7 +141,7 @@ def read_input(input_file, parameters, molecule):
molecular_container=molecule)
conformations[name].add_atom(atom)
# make a sorted list of conformation names
names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
names = sorted(conformations.keys(), key=conformation_sorter)
return [conformations, names]
@@ -192,7 +154,7 @@ def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']):
Yields:
conformation container, list of atoms
"""
lines = propka.lib.open_file_for_reading(input_file).readlines()
lines = open_file_for_reading(input_file).readlines()
conformation = ''
atoms = {}
numbers = []
@@ -246,3 +208,32 @@ def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']):
# prepare for next conformation
atoms = {}
numbers = []
def read_pdb(pdb_file, parameters, molecule):
"""Parse a PDB file.
Args:
pdb_file: file to read
parameters: parameters to guide parsing
molecule: molecular container
Returns:
list with elements:
1. list of conformations
2. list of names
"""
conformations = {}
# read in all atoms in the file
lines = get_atom_lines_from_pdb(
pdb_file, ignore_residues=parameters.ignore_residues,
keep_protons=molecule.options.keep_protons,
chains=molecule.options.chains)
for (name, atom) in lines:
if not name in conformations.keys():
conformations[name] = ConformationContainer(
name=name, parameters=parameters, molecular_container=molecule)
conformations[name].add_atom(atom)
# make a sorted list of conformation names
names = sorted(conformations.keys(), key=conformation_sorter)
return [conformations, names]

View File

@@ -11,56 +11,63 @@ _STDOUT_HANDLER.setFormatter(logging.Formatter("%(message)s"))
_LOGGER.addHandler(_STDOUT_HANDLER)
def open_file_for_reading(input_file):
"""Open file or file-like stream for reading.
EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
TODO - convert this to a context manager
def protein_precheck(conformations, names):
"""Check protein for correct number of atoms, etc.
Args:
input_file: path to file or file-like object. If file-like object,
then will attempt fseek(0).
names: conformation names to check
"""
try:
input_file.fseek(0)
return input_file
except AttributeError:
pass
try:
file_ = open(input_file, 'rt')
except:
raise IOError('Cannot find file {0:s}'.format(input_file))
return file_
for name in names:
atoms = conformations[name].atoms
# Group the atoms by their residue:
atoms_by_residue = {}
for atom in atoms:
if atom.element != 'H':
res_id = resid_from_atom(atom)
try:
atoms_by_residue[res_id].append(atom)
except KeyError:
atoms_by_residue[res_id] = [atom]
for res_id, res_atoms in atoms_by_residue.items():
res_name = res_atoms[0].res_name
residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
# ignore ligand residues
if res_name not in EXPECTED_ATOM_NUMBERS:
continue
# check for c-terminal
if 'C-' in [a.terminal for a in res_atoms]:
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
str_ = ("Unexpected number ({num:d}) of atoms in residue "
"{res:s} in conformation {conf:s}".format(
num=len(res_atoms), res=residue_label,
conf=name))
warning(str_)
continue
# check number of atoms in residue
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
str_ = ("Unexpected number ({num:d}) of atoms in residue "
"{res:s} in conformation {conf:s}".format(
num=len(res_atoms), res=residue_label,
conf=name))
warning(str_)
def open_file_for_writing(input_file):
"""Open file or file-like stream for writing.
TODO - convert this to a context manager.
def resid_from_atom(atom):
"""Return string with atom residue information.
Args:
input_file: path to file or file-like object. If file-like object,
then will attempt to get file mode.
atom: atom to generate string for
Returns
string
"""
try:
mode = input_file.mode
if not ("w" in mode or "a" in mode or "+" in mode):
raise IOError("File/stream not open for writing")
return input_file
except AttributeError:
pass
try:
file_ = open(input_file, 'wt')
except FileNotFoundError:
raise Exception('Could not open {0:s}'.format(input_file))
return file_
def conformation_sorter(conf):
"""TODO - figure out what this function does."""
model = int(conf[:-1])
altloc = conf[-1:]
return model*100+ord(altloc)
return '{0:>4d} {1:s} {2:s}'.format(
atom.res_num, atom.chain_id, atom.icode)
def split_atoms_into_molecules(atoms):
@@ -354,19 +361,6 @@ def configuration_compare(conf):
return 100*int(conf[1:-2]) + ord(conf[-1])
def write_file(filename, lines):
"""Writes a new file.
Args:
filename: name of file
lines: lines to write to file
"""
file_ = open_file_for_writing(filename)
for line in lines:
file_.write("{0:s}\n".format(line))
file_.close()
def _args_to_str(arg_list):
"""Summarize list of arguments in string.

View File

@@ -5,7 +5,6 @@ import sys
import propka.molecular_container
import propka.calculations
import propka.parameters
import propka.pdb
from propka.output import write_mol2_for_atoms
from propka.lib import info, warning

View File

@@ -2,10 +2,11 @@
import os
import sys
import propka.version
from propka.pdb import read_input
from propka.input import read_pdb, read_input, read_parameter_file
from propka.parameters import Parameters
from propka.output import write_input
from propka.conformation_container import ConformationContainer
from propka.lib import info, warning, make_grid
from propka.lib import info, warning, protein_precheck, make_grid
# TODO - these are constants whose origins are a little murky
@@ -38,11 +39,12 @@ class Molecular_container:
self.file = os.path.split(input_file)[1]
self.name = self.file[0:self.file.rfind('.')]
input_file_extension = input_file[input_file.rfind('.'):]
# set the version
parameters = Parameters()
if options:
parameters = propka.parameters.Parameters(self.options.parameters)
parameters = read_parameter_file(
self.options.parameters, parameters)
else:
parameters = propka.parameters.Parameters('propka.cfg')
parameters = read_parameter_file('propka.cfg', parameters)
try:
version_class = getattr(propka.version, parameters.version)
self.version = version_class(parameters)
@@ -56,15 +58,15 @@ class Molecular_container:
# input is a pdb file. read in atoms and top up containers to make
# sure that all atoms are present in all conformations
[self.conformations, self.conformation_names] = (
propka.pdb.read_pdb(input_file, self.version.parameters, self))
read_pdb(input_file, self.version.parameters, self))
if len(self.conformations) == 0:
info('Error: The pdb file does not seems to contain any '
'molecular conformations')
sys.exit(-1)
self.top_up_conformations()
# make a structure precheck
propka.pdb.protein_precheck(self.conformations,
self.conformation_names)
protein_precheck(
self.conformations, self.conformation_names)
# set up atom bonding and protonation
self.version.setup_bonding_and_protonation(self)
# Extract groups
@@ -79,9 +81,8 @@ class Molecular_container:
write_input(self, filename)
elif input_file_extension == '.propka_input':
#input is a propka_input file
[self.conformations, self.conformation_names] = (
propka.pdb.read_input(input_file, self.version.parameters,
self))
[self.conformations, self.conformation_names] = read_input(
input_file, self.version.parameters, self)
# Extract groups - this merely sets up the groups found in the
# input file
self.extract_groups()

View File

@@ -1,6 +1,42 @@
"""Output routines."""
from datetime import date
from propka.lib import info, open_file_for_writing
from propka.lib import info
def open_file_for_writing(input_file):
"""Open file or file-like stream for writing.
TODO - convert this to a context manager.
Args:
input_file: path to file or file-like object. If file-like object,
then will attempt to get file mode.
"""
try:
mode = input_file.mode
if not ("w" in mode or "a" in mode or "+" in mode):
raise IOError("File/stream not open for writing")
return input_file
except AttributeError:
pass
try:
file_ = open(input_file, 'wt')
except FileNotFoundError:
raise Exception('Could not open {0:s}'.format(input_file))
return file_
def write_file(filename, lines):
"""Writes a new file.
Args:
filename: name of file
lines: lines to write to file
"""
file_ = open_file_for_writing(filename)
for line in lines:
file_.write("{0:s}\n".format(line))
file_.close()
def print_header():

View File

@@ -35,7 +35,7 @@ STRINGS = ['version', 'output_file_tag', 'ligand_typing', 'pH', 'reference']
class Parameters:
"""PROPKA parameter class."""
def __init__(self, parameter_file):
def __init__(self):
"""Initialize parameter class.
Args:
@@ -52,22 +52,6 @@ class Parameters:
self.CYS_CYS_exception = None
# These functions set up remaining data structures implicitly
self.set_up_data_structures()
self.read_parameters(parameter_file)
def read_parameters(self, file_):
"""Read parameters from file.
Args:
file_: file to read
"""
# try to locate the parameters file
try:
ifile = pkg_resources.resource_filename(__name__, file_)
input_ = lib.open_file_for_reading(ifile)
except (IOError, FileNotFoundError, ValueError):
input_ = lib.open_file_for_reading(file_)
for line in input_:
self.parse_line(line)
def parse_line(self, line):
"""Parse parameter file line."""