Remove cyclic import based on I/O in pdb.py.
Partially addresses https://github.com/jensengroup/propka-3.1/issues/49
This commit is contained in:
@@ -1,96 +1,58 @@
|
|||||||
"""Read and parse PDB-like input files."""
|
"""Input routines."""
|
||||||
import propka.lib
|
from pkg_resources import resource_filename
|
||||||
from propka.lib import warning
|
|
||||||
from propka.atom import Atom
|
from propka.atom import Atom
|
||||||
from propka.group import initialize_atom_group
|
|
||||||
from propka.conformation_container import ConformationContainer
|
from propka.conformation_container import ConformationContainer
|
||||||
|
from propka.group import initialize_atom_group
|
||||||
|
|
||||||
|
|
||||||
EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
|
def open_file_for_reading(input_file):
|
||||||
'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
|
"""Open file or file-like stream for reading.
|
||||||
'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
|
|
||||||
'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
|
|
||||||
|
|
||||||
|
TODO - convert this to a context manager
|
||||||
def read_pdb(pdb_file, parameters, molecule):
|
|
||||||
"""Parse a PDB file.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
pdb_file: file to read
|
input_file: path to file or file-like object. If file-like object,
|
||||||
parameters: parameters to guide parsing
|
then will attempt fseek(0).
|
||||||
molecule: molecular container
|
"""
|
||||||
|
try:
|
||||||
|
input_file.fseek(0)
|
||||||
|
return input_file
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_ = open(input_file, 'rt')
|
||||||
|
except:
|
||||||
|
raise IOError('Cannot find file {0:s}'.format(input_file))
|
||||||
|
return file_
|
||||||
|
|
||||||
|
|
||||||
|
def read_parameter_file(input_file, parameters):
|
||||||
|
"""Read a parameter file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_file: input file to read
|
||||||
|
parameters: Parameters object
|
||||||
Returns:
|
Returns:
|
||||||
list with elements:
|
updated Parameters object
|
||||||
1. list of conformations
|
|
||||||
2. list of names
|
|
||||||
"""
|
"""
|
||||||
conformations = {}
|
# try to locate the parameter file
|
||||||
# read in all atoms in the file
|
try:
|
||||||
lines = get_atom_lines_from_pdb(
|
ifile = resource_filename(__name__, input_file)
|
||||||
pdb_file, ignore_residues=parameters.ignore_residues,
|
input_ = open_file_for_reading(ifile)
|
||||||
keep_protons=molecule.options.keep_protons,
|
except (IOError, FileNotFoundError, ValueError):
|
||||||
chains=molecule.options.chains)
|
input_ = open_file_for_reading(input_file)
|
||||||
for (name, atom) in lines:
|
for line in input_:
|
||||||
if not name in conformations.keys():
|
parameters.parse_line(line)
|
||||||
conformations[name] = ConformationContainer(
|
return parameters
|
||||||
name=name, parameters=parameters, molecular_container=molecule)
|
|
||||||
conformations[name].add_atom(atom)
|
|
||||||
# make a sorted list of conformation names
|
|
||||||
names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
|
|
||||||
return [conformations, names]
|
|
||||||
|
|
||||||
|
|
||||||
def protein_precheck(conformations, names):
|
|
||||||
"""Check protein for correct number of atoms, etc.
|
|
||||||
|
|
||||||
Args:
|
def conformation_sorter(conf):
|
||||||
names: conformation names to check
|
"""TODO - figure out what this function does."""
|
||||||
"""
|
model = int(conf[:-1])
|
||||||
for name in names:
|
altloc = conf[-1:]
|
||||||
atoms = conformations[name].atoms
|
return model*100+ord(altloc)
|
||||||
# Group the atoms by their residue:
|
|
||||||
atoms_by_residue = {}
|
|
||||||
for atom in atoms:
|
|
||||||
if atom.element != 'H':
|
|
||||||
res_id = resid_from_atom(atom)
|
|
||||||
try:
|
|
||||||
atoms_by_residue[res_id].append(atom)
|
|
||||||
except KeyError:
|
|
||||||
atoms_by_residue[res_id] = [atom]
|
|
||||||
for res_id, res_atoms in atoms_by_residue.items():
|
|
||||||
res_name = res_atoms[0].res_name
|
|
||||||
residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
|
|
||||||
# ignore ligand residues
|
|
||||||
if res_name not in EXPECTED_ATOM_NUMBERS:
|
|
||||||
continue
|
|
||||||
# check for c-terminal
|
|
||||||
if 'C-' in [a.terminal for a in res_atoms]:
|
|
||||||
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
|
|
||||||
str_ = ("Unexpected number ({num:d}) of atoms in residue "
|
|
||||||
"{res:s} in conformation {conf:s}".format(
|
|
||||||
num=len(res_atoms), res=residue_label,
|
|
||||||
conf=name))
|
|
||||||
warning(str_)
|
|
||||||
continue
|
|
||||||
# check number of atoms in residue
|
|
||||||
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
|
|
||||||
str_ = ("Unexpected number ({num:d}) of atoms in residue "
|
|
||||||
"{res:s} in conformation {conf:s}".format(
|
|
||||||
num=len(res_atoms), res=residue_label,
|
|
||||||
conf=name))
|
|
||||||
warning(str_)
|
|
||||||
|
|
||||||
|
|
||||||
def resid_from_atom(atom):
|
|
||||||
"""Return string with atom residue information.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
atom: atom to generate string for
|
|
||||||
Returns
|
|
||||||
string
|
|
||||||
"""
|
|
||||||
return '{0:>4d} {1:s} {2:s}'.format(
|
|
||||||
atom.res_num, atom.chain_id, atom.icode)
|
|
||||||
|
|
||||||
|
|
||||||
def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
|
def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
|
||||||
@@ -104,7 +66,7 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
|
|||||||
tags: tags of lines that include atoms
|
tags: tags of lines that include atoms
|
||||||
chains: list of chains
|
chains: list of chains
|
||||||
"""
|
"""
|
||||||
lines = propka.lib.open_file_for_reading(pdb_file).readlines()
|
lines = open_file_for_reading(pdb_file).readlines()
|
||||||
nterm_residue = 'next_residue'
|
nterm_residue = 'next_residue'
|
||||||
old_residue = None
|
old_residue = None
|
||||||
terminal = None
|
terminal = None
|
||||||
@@ -179,7 +141,7 @@ def read_input(input_file, parameters, molecule):
|
|||||||
molecular_container=molecule)
|
molecular_container=molecule)
|
||||||
conformations[name].add_atom(atom)
|
conformations[name].add_atom(atom)
|
||||||
# make a sorted list of conformation names
|
# make a sorted list of conformation names
|
||||||
names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
|
names = sorted(conformations.keys(), key=conformation_sorter)
|
||||||
return [conformations, names]
|
return [conformations, names]
|
||||||
|
|
||||||
|
|
||||||
@@ -192,7 +154,7 @@ def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']):
|
|||||||
Yields:
|
Yields:
|
||||||
conformation container, list of atoms
|
conformation container, list of atoms
|
||||||
"""
|
"""
|
||||||
lines = propka.lib.open_file_for_reading(input_file).readlines()
|
lines = open_file_for_reading(input_file).readlines()
|
||||||
conformation = ''
|
conformation = ''
|
||||||
atoms = {}
|
atoms = {}
|
||||||
numbers = []
|
numbers = []
|
||||||
@@ -246,3 +208,32 @@ def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']):
|
|||||||
# prepare for next conformation
|
# prepare for next conformation
|
||||||
atoms = {}
|
atoms = {}
|
||||||
numbers = []
|
numbers = []
|
||||||
|
|
||||||
|
def read_pdb(pdb_file, parameters, molecule):
|
||||||
|
"""Parse a PDB file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pdb_file: file to read
|
||||||
|
parameters: parameters to guide parsing
|
||||||
|
molecule: molecular container
|
||||||
|
Returns:
|
||||||
|
list with elements:
|
||||||
|
1. list of conformations
|
||||||
|
2. list of names
|
||||||
|
"""
|
||||||
|
conformations = {}
|
||||||
|
# read in all atoms in the file
|
||||||
|
lines = get_atom_lines_from_pdb(
|
||||||
|
pdb_file, ignore_residues=parameters.ignore_residues,
|
||||||
|
keep_protons=molecule.options.keep_protons,
|
||||||
|
chains=molecule.options.chains)
|
||||||
|
for (name, atom) in lines:
|
||||||
|
if not name in conformations.keys():
|
||||||
|
conformations[name] = ConformationContainer(
|
||||||
|
name=name, parameters=parameters, molecular_container=molecule)
|
||||||
|
conformations[name].add_atom(atom)
|
||||||
|
# make a sorted list of conformation names
|
||||||
|
names = sorted(conformations.keys(), key=conformation_sorter)
|
||||||
|
return [conformations, names]
|
||||||
|
|
||||||
|
|
||||||
102
propka/lib.py
102
propka/lib.py
@@ -11,56 +11,63 @@ _STDOUT_HANDLER.setFormatter(logging.Formatter("%(message)s"))
|
|||||||
_LOGGER.addHandler(_STDOUT_HANDLER)
|
_LOGGER.addHandler(_STDOUT_HANDLER)
|
||||||
|
|
||||||
|
|
||||||
def open_file_for_reading(input_file):
|
EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
|
||||||
"""Open file or file-like stream for reading.
|
'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
|
||||||
|
'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
|
||||||
|
'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
|
||||||
|
|
||||||
TODO - convert this to a context manager
|
|
||||||
|
def protein_precheck(conformations, names):
|
||||||
|
"""Check protein for correct number of atoms, etc.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
input_file: path to file or file-like object. If file-like object,
|
names: conformation names to check
|
||||||
then will attempt fseek(0).
|
|
||||||
"""
|
"""
|
||||||
try:
|
for name in names:
|
||||||
input_file.fseek(0)
|
atoms = conformations[name].atoms
|
||||||
return input_file
|
# Group the atoms by their residue:
|
||||||
except AttributeError:
|
atoms_by_residue = {}
|
||||||
pass
|
for atom in atoms:
|
||||||
|
if atom.element != 'H':
|
||||||
try:
|
res_id = resid_from_atom(atom)
|
||||||
file_ = open(input_file, 'rt')
|
try:
|
||||||
except:
|
atoms_by_residue[res_id].append(atom)
|
||||||
raise IOError('Cannot find file {0:s}'.format(input_file))
|
except KeyError:
|
||||||
return file_
|
atoms_by_residue[res_id] = [atom]
|
||||||
|
for res_id, res_atoms in atoms_by_residue.items():
|
||||||
|
res_name = res_atoms[0].res_name
|
||||||
|
residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
|
||||||
|
# ignore ligand residues
|
||||||
|
if res_name not in EXPECTED_ATOM_NUMBERS:
|
||||||
|
continue
|
||||||
|
# check for c-terminal
|
||||||
|
if 'C-' in [a.terminal for a in res_atoms]:
|
||||||
|
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
|
||||||
|
str_ = ("Unexpected number ({num:d}) of atoms in residue "
|
||||||
|
"{res:s} in conformation {conf:s}".format(
|
||||||
|
num=len(res_atoms), res=residue_label,
|
||||||
|
conf=name))
|
||||||
|
warning(str_)
|
||||||
|
continue
|
||||||
|
# check number of atoms in residue
|
||||||
|
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
|
||||||
|
str_ = ("Unexpected number ({num:d}) of atoms in residue "
|
||||||
|
"{res:s} in conformation {conf:s}".format(
|
||||||
|
num=len(res_atoms), res=residue_label,
|
||||||
|
conf=name))
|
||||||
|
warning(str_)
|
||||||
|
|
||||||
|
|
||||||
def open_file_for_writing(input_file):
|
def resid_from_atom(atom):
|
||||||
"""Open file or file-like stream for writing.
|
"""Return string with atom residue information.
|
||||||
|
|
||||||
TODO - convert this to a context manager.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
input_file: path to file or file-like object. If file-like object,
|
atom: atom to generate string for
|
||||||
then will attempt to get file mode.
|
Returns
|
||||||
|
string
|
||||||
"""
|
"""
|
||||||
try:
|
return '{0:>4d} {1:s} {2:s}'.format(
|
||||||
mode = input_file.mode
|
atom.res_num, atom.chain_id, atom.icode)
|
||||||
if not ("w" in mode or "a" in mode or "+" in mode):
|
|
||||||
raise IOError("File/stream not open for writing")
|
|
||||||
return input_file
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
file_ = open(input_file, 'wt')
|
|
||||||
except FileNotFoundError:
|
|
||||||
raise Exception('Could not open {0:s}'.format(input_file))
|
|
||||||
return file_
|
|
||||||
|
|
||||||
|
|
||||||
def conformation_sorter(conf):
|
|
||||||
"""TODO - figure out what this function does."""
|
|
||||||
model = int(conf[:-1])
|
|
||||||
altloc = conf[-1:]
|
|
||||||
return model*100+ord(altloc)
|
|
||||||
|
|
||||||
|
|
||||||
def split_atoms_into_molecules(atoms):
|
def split_atoms_into_molecules(atoms):
|
||||||
@@ -354,19 +361,6 @@ def configuration_compare(conf):
|
|||||||
return 100*int(conf[1:-2]) + ord(conf[-1])
|
return 100*int(conf[1:-2]) + ord(conf[-1])
|
||||||
|
|
||||||
|
|
||||||
def write_file(filename, lines):
|
|
||||||
"""Writes a new file.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
filename: name of file
|
|
||||||
lines: lines to write to file
|
|
||||||
"""
|
|
||||||
file_ = open_file_for_writing(filename)
|
|
||||||
for line in lines:
|
|
||||||
file_.write("{0:s}\n".format(line))
|
|
||||||
file_.close()
|
|
||||||
|
|
||||||
|
|
||||||
def _args_to_str(arg_list):
|
def _args_to_str(arg_list):
|
||||||
"""Summarize list of arguments in string.
|
"""Summarize list of arguments in string.
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import sys
|
|||||||
import propka.molecular_container
|
import propka.molecular_container
|
||||||
import propka.calculations
|
import propka.calculations
|
||||||
import propka.parameters
|
import propka.parameters
|
||||||
import propka.pdb
|
|
||||||
from propka.output import write_mol2_for_atoms
|
from propka.output import write_mol2_for_atoms
|
||||||
from propka.lib import info, warning
|
from propka.lib import info, warning
|
||||||
|
|
||||||
|
|||||||
@@ -2,10 +2,11 @@
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import propka.version
|
import propka.version
|
||||||
from propka.pdb import read_input
|
from propka.input import read_pdb, read_input, read_parameter_file
|
||||||
|
from propka.parameters import Parameters
|
||||||
from propka.output import write_input
|
from propka.output import write_input
|
||||||
from propka.conformation_container import ConformationContainer
|
from propka.conformation_container import ConformationContainer
|
||||||
from propka.lib import info, warning, make_grid
|
from propka.lib import info, warning, protein_precheck, make_grid
|
||||||
|
|
||||||
|
|
||||||
# TODO - these are constants whose origins are a little murky
|
# TODO - these are constants whose origins are a little murky
|
||||||
@@ -38,11 +39,12 @@ class Molecular_container:
|
|||||||
self.file = os.path.split(input_file)[1]
|
self.file = os.path.split(input_file)[1]
|
||||||
self.name = self.file[0:self.file.rfind('.')]
|
self.name = self.file[0:self.file.rfind('.')]
|
||||||
input_file_extension = input_file[input_file.rfind('.'):]
|
input_file_extension = input_file[input_file.rfind('.'):]
|
||||||
# set the version
|
parameters = Parameters()
|
||||||
if options:
|
if options:
|
||||||
parameters = propka.parameters.Parameters(self.options.parameters)
|
parameters = read_parameter_file(
|
||||||
|
self.options.parameters, parameters)
|
||||||
else:
|
else:
|
||||||
parameters = propka.parameters.Parameters('propka.cfg')
|
parameters = read_parameter_file('propka.cfg', parameters)
|
||||||
try:
|
try:
|
||||||
version_class = getattr(propka.version, parameters.version)
|
version_class = getattr(propka.version, parameters.version)
|
||||||
self.version = version_class(parameters)
|
self.version = version_class(parameters)
|
||||||
@@ -56,15 +58,15 @@ class Molecular_container:
|
|||||||
# input is a pdb file. read in atoms and top up containers to make
|
# input is a pdb file. read in atoms and top up containers to make
|
||||||
# sure that all atoms are present in all conformations
|
# sure that all atoms are present in all conformations
|
||||||
[self.conformations, self.conformation_names] = (
|
[self.conformations, self.conformation_names] = (
|
||||||
propka.pdb.read_pdb(input_file, self.version.parameters, self))
|
read_pdb(input_file, self.version.parameters, self))
|
||||||
if len(self.conformations) == 0:
|
if len(self.conformations) == 0:
|
||||||
info('Error: The pdb file does not seems to contain any '
|
info('Error: The pdb file does not seems to contain any '
|
||||||
'molecular conformations')
|
'molecular conformations')
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
self.top_up_conformations()
|
self.top_up_conformations()
|
||||||
# make a structure precheck
|
# make a structure precheck
|
||||||
propka.pdb.protein_precheck(self.conformations,
|
protein_precheck(
|
||||||
self.conformation_names)
|
self.conformations, self.conformation_names)
|
||||||
# set up atom bonding and protonation
|
# set up atom bonding and protonation
|
||||||
self.version.setup_bonding_and_protonation(self)
|
self.version.setup_bonding_and_protonation(self)
|
||||||
# Extract groups
|
# Extract groups
|
||||||
@@ -79,9 +81,8 @@ class Molecular_container:
|
|||||||
write_input(self, filename)
|
write_input(self, filename)
|
||||||
elif input_file_extension == '.propka_input':
|
elif input_file_extension == '.propka_input':
|
||||||
#input is a propka_input file
|
#input is a propka_input file
|
||||||
[self.conformations, self.conformation_names] = (
|
[self.conformations, self.conformation_names] = read_input(
|
||||||
propka.pdb.read_input(input_file, self.version.parameters,
|
input_file, self.version.parameters, self)
|
||||||
self))
|
|
||||||
# Extract groups - this merely sets up the groups found in the
|
# Extract groups - this merely sets up the groups found in the
|
||||||
# input file
|
# input file
|
||||||
self.extract_groups()
|
self.extract_groups()
|
||||||
|
|||||||
@@ -1,6 +1,42 @@
|
|||||||
"""Output routines."""
|
"""Output routines."""
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from propka.lib import info, open_file_for_writing
|
from propka.lib import info
|
||||||
|
|
||||||
|
|
||||||
|
def open_file_for_writing(input_file):
|
||||||
|
"""Open file or file-like stream for writing.
|
||||||
|
|
||||||
|
TODO - convert this to a context manager.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_file: path to file or file-like object. If file-like object,
|
||||||
|
then will attempt to get file mode.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
mode = input_file.mode
|
||||||
|
if not ("w" in mode or "a" in mode or "+" in mode):
|
||||||
|
raise IOError("File/stream not open for writing")
|
||||||
|
return input_file
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
file_ = open(input_file, 'wt')
|
||||||
|
except FileNotFoundError:
|
||||||
|
raise Exception('Could not open {0:s}'.format(input_file))
|
||||||
|
return file_
|
||||||
|
|
||||||
|
|
||||||
|
def write_file(filename, lines):
|
||||||
|
"""Writes a new file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filename: name of file
|
||||||
|
lines: lines to write to file
|
||||||
|
"""
|
||||||
|
file_ = open_file_for_writing(filename)
|
||||||
|
for line in lines:
|
||||||
|
file_.write("{0:s}\n".format(line))
|
||||||
|
file_.close()
|
||||||
|
|
||||||
|
|
||||||
def print_header():
|
def print_header():
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ STRINGS = ['version', 'output_file_tag', 'ligand_typing', 'pH', 'reference']
|
|||||||
class Parameters:
|
class Parameters:
|
||||||
"""PROPKA parameter class."""
|
"""PROPKA parameter class."""
|
||||||
|
|
||||||
def __init__(self, parameter_file):
|
def __init__(self):
|
||||||
"""Initialize parameter class.
|
"""Initialize parameter class.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -52,22 +52,6 @@ class Parameters:
|
|||||||
self.CYS_CYS_exception = None
|
self.CYS_CYS_exception = None
|
||||||
# These functions set up remaining data structures implicitly
|
# These functions set up remaining data structures implicitly
|
||||||
self.set_up_data_structures()
|
self.set_up_data_structures()
|
||||||
self.read_parameters(parameter_file)
|
|
||||||
|
|
||||||
def read_parameters(self, file_):
|
|
||||||
"""Read parameters from file.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_: file to read
|
|
||||||
"""
|
|
||||||
# try to locate the parameters file
|
|
||||||
try:
|
|
||||||
ifile = pkg_resources.resource_filename(__name__, file_)
|
|
||||||
input_ = lib.open_file_for_reading(ifile)
|
|
||||||
except (IOError, FileNotFoundError, ValueError):
|
|
||||||
input_ = lib.open_file_for_reading(file_)
|
|
||||||
for line in input_:
|
|
||||||
self.parse_line(line)
|
|
||||||
|
|
||||||
def parse_line(self, line):
|
def parse_line(self, line):
|
||||||
"""Parse parameter file line."""
|
"""Parse parameter file line."""
|
||||||
|
|||||||
Reference in New Issue
Block a user