Merge pull request #84 from IAlibay/streams
Allows for file-like objects to be passed to read_molecule_file
This commit is contained in:
@@ -20,10 +20,10 @@ def open_file_for_reading(input_file):
|
||||
|
||||
Args:
|
||||
input_file: path to file or file-like object. If file-like object,
|
||||
then will attempt fseek(0).
|
||||
then will attempt seek(0).
|
||||
"""
|
||||
try:
|
||||
input_file.fseek(0)
|
||||
input_file.seek(0)
|
||||
return input_file
|
||||
except AttributeError:
|
||||
pass
|
||||
@@ -35,27 +35,62 @@ def open_file_for_reading(input_file):
|
||||
return file_
|
||||
|
||||
|
||||
def read_molecule_file(input_file, mol_container):
|
||||
"""Read input file (PDB or PROPKA) for a molecular container
|
||||
def read_molecule_file(filename: str, mol_container, stream=None):
|
||||
"""Read input file or stream (PDB or PROPKA) for a molecular container
|
||||
|
||||
Args
|
||||
input_file: input file to read
|
||||
mol_container: MolecularContainer object
|
||||
Returns
|
||||
Args:
|
||||
filename(str): name of input file. If not using a filestream via the
|
||||
``stream`` argument, should be a path to the file to be read.
|
||||
mol_container: MolecularContainer object.
|
||||
stream: optional filestream handle. If ``None``, then open
|
||||
``filename`` as a local file for reading.
|
||||
|
||||
Returns:
|
||||
updated MolecularContainer object
|
||||
Raises
|
||||
|
||||
Raises:
|
||||
ValuError if invalid input given
|
||||
|
||||
Examples:
|
||||
There are two main cases for using ``read_molecule_file``. The first
|
||||
(and most common) is to pass the input file (``filename``) as a
|
||||
string which gives the path of the molecule file to be read (here we
|
||||
also pass a ``MoleculeContainer`` object named ``mol_container``).
|
||||
|
||||
>>> read_molecule_file('test.pdb', mol_container)
|
||||
<propka.molecular_container.MolecularContainer at 0x7f6e0c8f2310>
|
||||
|
||||
The other use case is when passing a file-like object, e.g. a
|
||||
``StringIO`` class, instance. This is done by passing the object via
|
||||
the ``stream`` argument. Since file-like objects do not usually have
|
||||
an associated file name, an appropirate file name should be passed to
|
||||
the ``filename`` argument. In this case, ``filename`` is not opened for
|
||||
reading, but instead is used to help recognise the file type (based on
|
||||
the extension being either `.pdb` or `.propka_input`) and also uses
|
||||
that given ``filename`` to assign a name to the input
|
||||
MolecularContainer object.
|
||||
|
||||
>>> read_molecule_file('test.pdb', mol_container,
|
||||
stream=string_io_object)
|
||||
<propka.molecular_container.MolecularContainer at 0x7f6e0c8f2310>
|
||||
|
||||
"""
|
||||
input_path = Path(input_file)
|
||||
input_path = Path(filename)
|
||||
mol_container.name = input_path.stem
|
||||
input_file_extension = input_path.suffix
|
||||
|
||||
if stream is not None:
|
||||
input_file = stream
|
||||
else:
|
||||
input_file = filename
|
||||
|
||||
if input_file_extension.lower() == '.pdb':
|
||||
# input is a pdb file. read in atoms and top up containers to make
|
||||
# sure that all atoms are present in all conformations
|
||||
conformations, conformation_names = read_pdb(
|
||||
input_path, mol_container.version.parameters, mol_container)
|
||||
input_file, mol_container.version.parameters, mol_container)
|
||||
if len(conformations) == 0:
|
||||
str_ = ('Error: The pdb file does not seems to contain any '
|
||||
str_ = ('Error: The pdb file does not seem to contain any '
|
||||
'molecular conformations')
|
||||
raise ValueError(str_)
|
||||
mol_container.conformations = conformations
|
||||
|
||||
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
120
tests/test_streamio.py
Normal file
120
tests/test_streamio.py
Normal file
@@ -0,0 +1,120 @@
|
||||
"""Tests for PROPKA stream io"""
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from io import StringIO
|
||||
import pytest
|
||||
from propka.parameters import Parameters
|
||||
from propka.molecular_container import MolecularContainer
|
||||
from propka.input import read_parameter_file, read_molecule_file
|
||||
from propka.lib import loadOptions
|
||||
|
||||
from .test_basic_regression import get_test_dirs, compare_output
|
||||
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_paths(pdb):
|
||||
"""Helper function to get the path to the input and reference files"""
|
||||
path_dict = get_test_dirs()
|
||||
ref_path = path_dict["results"] / ("{0:s}.dat".format(pdb))
|
||||
pdb_path = path_dict["pdbs"] / ("{0:s}.pdb".format(pdb))
|
||||
|
||||
return ref_path.resolve(), pdb_path.resolve()
|
||||
|
||||
|
||||
def run_propka_stream(options, input_file, filename):
|
||||
"""Run PROPKA software.
|
||||
|
||||
Args:
|
||||
options: list of PROPKA options
|
||||
input_file: file-like PDB object
|
||||
filename: filename for the file-like PDB object
|
||||
"""
|
||||
options += [filename]
|
||||
args = loadOptions(options)
|
||||
parameters = read_parameter_file(args.parameters, Parameters())
|
||||
molecule = MolecularContainer(parameters, args)
|
||||
molecule = read_molecule_file(filename, molecule, stream=input_file)
|
||||
molecule.calculate_pka()
|
||||
molecule.write_pka()
|
||||
if args.generate_propka_input:
|
||||
molecule.write_propka()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pdb, options", [
|
||||
pytest.param("1FTJ-Chain-A", [], id="1FTJ-Chain-A: no options"),
|
||||
pytest.param('3SGB-subset', [
|
||||
"--titrate_only",
|
||||
"E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"],
|
||||
id="3SGB: --titrate_only"),
|
||||
pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet"),
|
||||
])
|
||||
def test_textio_filestream(tmpdir, pdb, options):
|
||||
"""Basic regression test using TextIO streams for the input PDB file"""
|
||||
# Get the relevant paths
|
||||
ref_path, pdb_path = get_paths(pdb)
|
||||
filename = f"{pdb}.pdb"
|
||||
|
||||
filestream = open(pdb_path, 'r')
|
||||
|
||||
with tmpdir.as_cwd():
|
||||
run_propka_stream(options, filestream, filename)
|
||||
compare_output(pdb, Path.cwd(), ref_path)
|
||||
|
||||
filestream.close()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pdb, options", [
|
||||
pytest.param("1FTJ-Chain-A", [], id="1FTJ-Chain-A: no options"),
|
||||
pytest.param('3SGB-subset', [
|
||||
"--titrate_only",
|
||||
"E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"],
|
||||
id="3SGB: --titrate_only"),
|
||||
pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet"),
|
||||
])
|
||||
def test_stringio_filestream(tmpdir, pdb, options):
|
||||
"""Basic regression test using StringIO streams for the input PDB file"""
|
||||
# Get the relevant paths
|
||||
ref_path, pdb_path = get_paths(pdb)
|
||||
filename = f"{pdb}.pdb"
|
||||
|
||||
with open(pdb_path, 'r') as writer:
|
||||
filestream = StringIO(writer.read())
|
||||
|
||||
with tmpdir.as_cwd():
|
||||
run_propka_stream(options, filestream, filename)
|
||||
compare_output(pdb, Path.cwd(), ref_path)
|
||||
|
||||
filestream.close()
|
||||
|
||||
|
||||
def test_valuerror_nofiletype():
|
||||
"""Tests for raised ValueError when an unknown filename is passed to
|
||||
read_molecule_file"""
|
||||
pdb = "1FTJ-Chain-A"
|
||||
options = []
|
||||
|
||||
ref_path, pdb_path = get_paths(pdb)
|
||||
|
||||
with open(pdb_path, 'r') as writer:
|
||||
filestream = StringIO(writer.read())
|
||||
|
||||
errmsg = "Unknown input file type"
|
||||
with pytest.raises(ValueError, match=errmsg):
|
||||
run_propka_stream(options, filestream, filename="test.dat")
|
||||
|
||||
|
||||
def test_valuerror_notpdb():
|
||||
"""Tests for raised ValueError when a stream object that isn't a PDB
|
||||
is passed to read_molecule_file"""
|
||||
pdb = "1FTJ-Chain-A"
|
||||
options = []
|
||||
|
||||
ref_path, pdb_path = get_paths(pdb)
|
||||
|
||||
filestream = StringIO()
|
||||
|
||||
errmsg = "The pdb file does not seem to contain any "
|
||||
with pytest.raises(ValueError, match=errmsg):
|
||||
run_propka_stream(options, filestream, filename="test.pdb")
|
||||
Reference in New Issue
Block a user