Remove cyclic import based on I/O in pdb.py.
Partially addresses https://github.com/jensengroup/propka-3.1/issues/49
This commit is contained in:
@@ -1,96 +1,58 @@
|
||||
"""Read and parse PDB-like input files."""
|
||||
import propka.lib
|
||||
from propka.lib import warning
|
||||
"""Input routines."""
|
||||
from pkg_resources import resource_filename
|
||||
from propka.atom import Atom
|
||||
from propka.group import initialize_atom_group
|
||||
from propka.conformation_container import ConformationContainer
|
||||
from propka.group import initialize_atom_group
|
||||
|
||||
|
||||
EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
|
||||
'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
|
||||
'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
|
||||
'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
|
||||
def open_file_for_reading(input_file):
|
||||
"""Open file or file-like stream for reading.
|
||||
|
||||
|
||||
def read_pdb(pdb_file, parameters, molecule):
|
||||
"""Parse a PDB file.
|
||||
TODO - convert this to a context manager
|
||||
|
||||
Args:
|
||||
pdb_file: file to read
|
||||
parameters: parameters to guide parsing
|
||||
molecule: molecular container
|
||||
input_file: path to file or file-like object. If file-like object,
|
||||
then will attempt fseek(0).
|
||||
"""
|
||||
try:
|
||||
input_file.fseek(0)
|
||||
return input_file
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
file_ = open(input_file, 'rt')
|
||||
except:
|
||||
raise IOError('Cannot find file {0:s}'.format(input_file))
|
||||
return file_
|
||||
|
||||
|
||||
def read_parameter_file(input_file, parameters):
|
||||
"""Read a parameter file.
|
||||
|
||||
Args:
|
||||
input_file: input file to read
|
||||
parameters: Parameters object
|
||||
Returns:
|
||||
list with elements:
|
||||
1. list of conformations
|
||||
2. list of names
|
||||
updated Parameters object
|
||||
"""
|
||||
conformations = {}
|
||||
# read in all atoms in the file
|
||||
lines = get_atom_lines_from_pdb(
|
||||
pdb_file, ignore_residues=parameters.ignore_residues,
|
||||
keep_protons=molecule.options.keep_protons,
|
||||
chains=molecule.options.chains)
|
||||
for (name, atom) in lines:
|
||||
if not name in conformations.keys():
|
||||
conformations[name] = ConformationContainer(
|
||||
name=name, parameters=parameters, molecular_container=molecule)
|
||||
conformations[name].add_atom(atom)
|
||||
# make a sorted list of conformation names
|
||||
names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
|
||||
return [conformations, names]
|
||||
# try to locate the parameter file
|
||||
try:
|
||||
ifile = resource_filename(__name__, input_file)
|
||||
input_ = open_file_for_reading(ifile)
|
||||
except (IOError, FileNotFoundError, ValueError):
|
||||
input_ = open_file_for_reading(input_file)
|
||||
for line in input_:
|
||||
parameters.parse_line(line)
|
||||
return parameters
|
||||
|
||||
|
||||
def protein_precheck(conformations, names):
|
||||
"""Check protein for correct number of atoms, etc.
|
||||
|
||||
Args:
|
||||
names: conformation names to check
|
||||
"""
|
||||
for name in names:
|
||||
atoms = conformations[name].atoms
|
||||
# Group the atoms by their residue:
|
||||
atoms_by_residue = {}
|
||||
for atom in atoms:
|
||||
if atom.element != 'H':
|
||||
res_id = resid_from_atom(atom)
|
||||
try:
|
||||
atoms_by_residue[res_id].append(atom)
|
||||
except KeyError:
|
||||
atoms_by_residue[res_id] = [atom]
|
||||
for res_id, res_atoms in atoms_by_residue.items():
|
||||
res_name = res_atoms[0].res_name
|
||||
residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
|
||||
# ignore ligand residues
|
||||
if res_name not in EXPECTED_ATOM_NUMBERS:
|
||||
continue
|
||||
# check for c-terminal
|
||||
if 'C-' in [a.terminal for a in res_atoms]:
|
||||
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
|
||||
str_ = ("Unexpected number ({num:d}) of atoms in residue "
|
||||
"{res:s} in conformation {conf:s}".format(
|
||||
num=len(res_atoms), res=residue_label,
|
||||
conf=name))
|
||||
warning(str_)
|
||||
continue
|
||||
# check number of atoms in residue
|
||||
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
|
||||
str_ = ("Unexpected number ({num:d}) of atoms in residue "
|
||||
"{res:s} in conformation {conf:s}".format(
|
||||
num=len(res_atoms), res=residue_label,
|
||||
conf=name))
|
||||
warning(str_)
|
||||
|
||||
|
||||
def resid_from_atom(atom):
|
||||
"""Return string with atom residue information.
|
||||
|
||||
Args:
|
||||
atom: atom to generate string for
|
||||
Returns
|
||||
string
|
||||
"""
|
||||
return '{0:>4d} {1:s} {2:s}'.format(
|
||||
atom.res_num, atom.chain_id, atom.icode)
|
||||
def conformation_sorter(conf):
|
||||
"""TODO - figure out what this function does."""
|
||||
model = int(conf[:-1])
|
||||
altloc = conf[-1:]
|
||||
return model*100+ord(altloc)
|
||||
|
||||
|
||||
def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
|
||||
@@ -104,7 +66,7 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
|
||||
tags: tags of lines that include atoms
|
||||
chains: list of chains
|
||||
"""
|
||||
lines = propka.lib.open_file_for_reading(pdb_file).readlines()
|
||||
lines = open_file_for_reading(pdb_file).readlines()
|
||||
nterm_residue = 'next_residue'
|
||||
old_residue = None
|
||||
terminal = None
|
||||
@@ -179,7 +141,7 @@ def read_input(input_file, parameters, molecule):
|
||||
molecular_container=molecule)
|
||||
conformations[name].add_atom(atom)
|
||||
# make a sorted list of conformation names
|
||||
names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
|
||||
names = sorted(conformations.keys(), key=conformation_sorter)
|
||||
return [conformations, names]
|
||||
|
||||
|
||||
@@ -192,7 +154,7 @@ def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']):
|
||||
Yields:
|
||||
conformation container, list of atoms
|
||||
"""
|
||||
lines = propka.lib.open_file_for_reading(input_file).readlines()
|
||||
lines = open_file_for_reading(input_file).readlines()
|
||||
conformation = ''
|
||||
atoms = {}
|
||||
numbers = []
|
||||
@@ -246,3 +208,32 @@ def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']):
|
||||
# prepare for next conformation
|
||||
atoms = {}
|
||||
numbers = []
|
||||
|
||||
def read_pdb(pdb_file, parameters, molecule):
|
||||
"""Parse a PDB file.
|
||||
|
||||
Args:
|
||||
pdb_file: file to read
|
||||
parameters: parameters to guide parsing
|
||||
molecule: molecular container
|
||||
Returns:
|
||||
list with elements:
|
||||
1. list of conformations
|
||||
2. list of names
|
||||
"""
|
||||
conformations = {}
|
||||
# read in all atoms in the file
|
||||
lines = get_atom_lines_from_pdb(
|
||||
pdb_file, ignore_residues=parameters.ignore_residues,
|
||||
keep_protons=molecule.options.keep_protons,
|
||||
chains=molecule.options.chains)
|
||||
for (name, atom) in lines:
|
||||
if not name in conformations.keys():
|
||||
conformations[name] = ConformationContainer(
|
||||
name=name, parameters=parameters, molecular_container=molecule)
|
||||
conformations[name].add_atom(atom)
|
||||
# make a sorted list of conformation names
|
||||
names = sorted(conformations.keys(), key=conformation_sorter)
|
||||
return [conformations, names]
|
||||
|
||||
|
||||
102
propka/lib.py
102
propka/lib.py
@@ -11,56 +11,63 @@ _STDOUT_HANDLER.setFormatter(logging.Formatter("%(message)s"))
|
||||
_LOGGER.addHandler(_STDOUT_HANDLER)
|
||||
|
||||
|
||||
def open_file_for_reading(input_file):
|
||||
"""Open file or file-like stream for reading.
|
||||
EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
|
||||
'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
|
||||
'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
|
||||
'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
|
||||
|
||||
TODO - convert this to a context manager
|
||||
|
||||
def protein_precheck(conformations, names):
|
||||
"""Check protein for correct number of atoms, etc.
|
||||
|
||||
Args:
|
||||
input_file: path to file or file-like object. If file-like object,
|
||||
then will attempt fseek(0).
|
||||
names: conformation names to check
|
||||
"""
|
||||
try:
|
||||
input_file.fseek(0)
|
||||
return input_file
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
file_ = open(input_file, 'rt')
|
||||
except:
|
||||
raise IOError('Cannot find file {0:s}'.format(input_file))
|
||||
return file_
|
||||
for name in names:
|
||||
atoms = conformations[name].atoms
|
||||
# Group the atoms by their residue:
|
||||
atoms_by_residue = {}
|
||||
for atom in atoms:
|
||||
if atom.element != 'H':
|
||||
res_id = resid_from_atom(atom)
|
||||
try:
|
||||
atoms_by_residue[res_id].append(atom)
|
||||
except KeyError:
|
||||
atoms_by_residue[res_id] = [atom]
|
||||
for res_id, res_atoms in atoms_by_residue.items():
|
||||
res_name = res_atoms[0].res_name
|
||||
residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
|
||||
# ignore ligand residues
|
||||
if res_name not in EXPECTED_ATOM_NUMBERS:
|
||||
continue
|
||||
# check for c-terminal
|
||||
if 'C-' in [a.terminal for a in res_atoms]:
|
||||
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
|
||||
str_ = ("Unexpected number ({num:d}) of atoms in residue "
|
||||
"{res:s} in conformation {conf:s}".format(
|
||||
num=len(res_atoms), res=residue_label,
|
||||
conf=name))
|
||||
warning(str_)
|
||||
continue
|
||||
# check number of atoms in residue
|
||||
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
|
||||
str_ = ("Unexpected number ({num:d}) of atoms in residue "
|
||||
"{res:s} in conformation {conf:s}".format(
|
||||
num=len(res_atoms), res=residue_label,
|
||||
conf=name))
|
||||
warning(str_)
|
||||
|
||||
|
||||
def open_file_for_writing(input_file):
|
||||
"""Open file or file-like stream for writing.
|
||||
|
||||
TODO - convert this to a context manager.
|
||||
def resid_from_atom(atom):
|
||||
"""Return string with atom residue information.
|
||||
|
||||
Args:
|
||||
input_file: path to file or file-like object. If file-like object,
|
||||
then will attempt to get file mode.
|
||||
atom: atom to generate string for
|
||||
Returns
|
||||
string
|
||||
"""
|
||||
try:
|
||||
mode = input_file.mode
|
||||
if not ("w" in mode or "a" in mode or "+" in mode):
|
||||
raise IOError("File/stream not open for writing")
|
||||
return input_file
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
file_ = open(input_file, 'wt')
|
||||
except FileNotFoundError:
|
||||
raise Exception('Could not open {0:s}'.format(input_file))
|
||||
return file_
|
||||
|
||||
|
||||
def conformation_sorter(conf):
|
||||
"""TODO - figure out what this function does."""
|
||||
model = int(conf[:-1])
|
||||
altloc = conf[-1:]
|
||||
return model*100+ord(altloc)
|
||||
return '{0:>4d} {1:s} {2:s}'.format(
|
||||
atom.res_num, atom.chain_id, atom.icode)
|
||||
|
||||
|
||||
def split_atoms_into_molecules(atoms):
|
||||
@@ -354,19 +361,6 @@ def configuration_compare(conf):
|
||||
return 100*int(conf[1:-2]) + ord(conf[-1])
|
||||
|
||||
|
||||
def write_file(filename, lines):
|
||||
"""Writes a new file.
|
||||
|
||||
Args:
|
||||
filename: name of file
|
||||
lines: lines to write to file
|
||||
"""
|
||||
file_ = open_file_for_writing(filename)
|
||||
for line in lines:
|
||||
file_.write("{0:s}\n".format(line))
|
||||
file_.close()
|
||||
|
||||
|
||||
def _args_to_str(arg_list):
|
||||
"""Summarize list of arguments in string.
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ import sys
|
||||
import propka.molecular_container
|
||||
import propka.calculations
|
||||
import propka.parameters
|
||||
import propka.pdb
|
||||
from propka.output import write_mol2_for_atoms
|
||||
from propka.lib import info, warning
|
||||
|
||||
|
||||
@@ -2,10 +2,11 @@
|
||||
import os
|
||||
import sys
|
||||
import propka.version
|
||||
from propka.pdb import read_input
|
||||
from propka.input import read_pdb, read_input, read_parameter_file
|
||||
from propka.parameters import Parameters
|
||||
from propka.output import write_input
|
||||
from propka.conformation_container import ConformationContainer
|
||||
from propka.lib import info, warning, make_grid
|
||||
from propka.lib import info, warning, protein_precheck, make_grid
|
||||
|
||||
|
||||
# TODO - these are constants whose origins are a little murky
|
||||
@@ -38,11 +39,12 @@ class Molecular_container:
|
||||
self.file = os.path.split(input_file)[1]
|
||||
self.name = self.file[0:self.file.rfind('.')]
|
||||
input_file_extension = input_file[input_file.rfind('.'):]
|
||||
# set the version
|
||||
parameters = Parameters()
|
||||
if options:
|
||||
parameters = propka.parameters.Parameters(self.options.parameters)
|
||||
parameters = read_parameter_file(
|
||||
self.options.parameters, parameters)
|
||||
else:
|
||||
parameters = propka.parameters.Parameters('propka.cfg')
|
||||
parameters = read_parameter_file('propka.cfg', parameters)
|
||||
try:
|
||||
version_class = getattr(propka.version, parameters.version)
|
||||
self.version = version_class(parameters)
|
||||
@@ -56,15 +58,15 @@ class Molecular_container:
|
||||
# input is a pdb file. read in atoms and top up containers to make
|
||||
# sure that all atoms are present in all conformations
|
||||
[self.conformations, self.conformation_names] = (
|
||||
propka.pdb.read_pdb(input_file, self.version.parameters, self))
|
||||
read_pdb(input_file, self.version.parameters, self))
|
||||
if len(self.conformations) == 0:
|
||||
info('Error: The pdb file does not seems to contain any '
|
||||
'molecular conformations')
|
||||
sys.exit(-1)
|
||||
self.top_up_conformations()
|
||||
# make a structure precheck
|
||||
propka.pdb.protein_precheck(self.conformations,
|
||||
self.conformation_names)
|
||||
protein_precheck(
|
||||
self.conformations, self.conformation_names)
|
||||
# set up atom bonding and protonation
|
||||
self.version.setup_bonding_and_protonation(self)
|
||||
# Extract groups
|
||||
@@ -79,9 +81,8 @@ class Molecular_container:
|
||||
write_input(self, filename)
|
||||
elif input_file_extension == '.propka_input':
|
||||
#input is a propka_input file
|
||||
[self.conformations, self.conformation_names] = (
|
||||
propka.pdb.read_input(input_file, self.version.parameters,
|
||||
self))
|
||||
[self.conformations, self.conformation_names] = read_input(
|
||||
input_file, self.version.parameters, self)
|
||||
# Extract groups - this merely sets up the groups found in the
|
||||
# input file
|
||||
self.extract_groups()
|
||||
|
||||
@@ -1,6 +1,42 @@
|
||||
"""Output routines."""
|
||||
from datetime import date
|
||||
from propka.lib import info, open_file_for_writing
|
||||
from propka.lib import info
|
||||
|
||||
|
||||
def open_file_for_writing(input_file):
|
||||
"""Open file or file-like stream for writing.
|
||||
|
||||
TODO - convert this to a context manager.
|
||||
|
||||
Args:
|
||||
input_file: path to file or file-like object. If file-like object,
|
||||
then will attempt to get file mode.
|
||||
"""
|
||||
try:
|
||||
mode = input_file.mode
|
||||
if not ("w" in mode or "a" in mode or "+" in mode):
|
||||
raise IOError("File/stream not open for writing")
|
||||
return input_file
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
file_ = open(input_file, 'wt')
|
||||
except FileNotFoundError:
|
||||
raise Exception('Could not open {0:s}'.format(input_file))
|
||||
return file_
|
||||
|
||||
|
||||
def write_file(filename, lines):
|
||||
"""Writes a new file.
|
||||
|
||||
Args:
|
||||
filename: name of file
|
||||
lines: lines to write to file
|
||||
"""
|
||||
file_ = open_file_for_writing(filename)
|
||||
for line in lines:
|
||||
file_.write("{0:s}\n".format(line))
|
||||
file_.close()
|
||||
|
||||
|
||||
def print_header():
|
||||
|
||||
@@ -35,7 +35,7 @@ STRINGS = ['version', 'output_file_tag', 'ligand_typing', 'pH', 'reference']
|
||||
class Parameters:
|
||||
"""PROPKA parameter class."""
|
||||
|
||||
def __init__(self, parameter_file):
|
||||
def __init__(self):
|
||||
"""Initialize parameter class.
|
||||
|
||||
Args:
|
||||
@@ -52,22 +52,6 @@ class Parameters:
|
||||
self.CYS_CYS_exception = None
|
||||
# These functions set up remaining data structures implicitly
|
||||
self.set_up_data_structures()
|
||||
self.read_parameters(parameter_file)
|
||||
|
||||
def read_parameters(self, file_):
|
||||
"""Read parameters from file.
|
||||
|
||||
Args:
|
||||
file_: file to read
|
||||
"""
|
||||
# try to locate the parameters file
|
||||
try:
|
||||
ifile = pkg_resources.resource_filename(__name__, file_)
|
||||
input_ = lib.open_file_for_reading(ifile)
|
||||
except (IOError, FileNotFoundError, ValueError):
|
||||
input_ = lib.open_file_for_reading(file_)
|
||||
for line in input_:
|
||||
self.parse_line(line)
|
||||
|
||||
def parse_line(self, line):
|
||||
"""Parse parameter file line."""
|
||||
|
||||
Reference in New Issue
Block a user