diff --git a/propka/input.py b/propka/input.py index 8d70a55..de17149 100644 --- a/propka/input.py +++ b/propka/input.py @@ -20,10 +20,10 @@ def open_file_for_reading(input_file): Args: input_file: path to file or file-like object. If file-like object, - then will attempt fseek(0). + then will attempt seek(0). """ try: - input_file.fseek(0) + input_file.seek(0) return input_file except AttributeError: pass @@ -35,27 +35,62 @@ def open_file_for_reading(input_file): return file_ -def read_molecule_file(input_file, mol_container): - """Read input file (PDB or PROPKA) for a molecular container +def read_molecule_file(filename: str, mol_container, stream=None): + """Read input file or stream (PDB or PROPKA) for a molecular container - Args - input_file: input file to read - mol_container: MolecularContainer object - Returns + Args: + filename(str): name of input file. If not using a filestream via the + ``stream`` argument, should be a path to the file to be read. + mol_container: MolecularContainer object. + stream: optional filestream handle. If ``None``, then open + ``filename`` as a local file for reading. + + Returns: updated MolecularContainer object - Raises + + Raises: ValuError if invalid input given + + Examples: + There are two main cases for using ``read_molecule_file``. The first + (and most common) is to pass the input file (``filename``) as a + string which gives the path of the molecule file to be read (here we + also pass a ``MoleculeContainer`` object named ``mol_container``). + + >>> read_molecule_file('test.pdb', mol_container) + + + The other use case is when passing a file-like object, e.g. a + ``StringIO`` class, instance. This is done by passing the object via + the ``stream`` argument. Since file-like objects do not usually have + an associated file name, an appropirate file name should be passed to + the ``filename`` argument. In this case, ``filename`` is not opened for + reading, but instead is used to help recognise the file type (based on + the extension being either `.pdb` or `.propka_input`) and also uses + that given ``filename`` to assign a name to the input + MolecularContainer object. + + >>> read_molecule_file('test.pdb', mol_container, + stream=string_io_object) + + """ - input_path = Path(input_file) + input_path = Path(filename) mol_container.name = input_path.stem input_file_extension = input_path.suffix + + if stream is not None: + input_file = stream + else: + input_file = filename + if input_file_extension.lower() == '.pdb': # input is a pdb file. read in atoms and top up containers to make # sure that all atoms are present in all conformations conformations, conformation_names = read_pdb( - input_path, mol_container.version.parameters, mol_container) + input_file, mol_container.version.parameters, mol_container) if len(conformations) == 0: - str_ = ('Error: The pdb file does not seems to contain any ' + str_ = ('Error: The pdb file does not seem to contain any ' 'molecular conformations') raise ValueError(str_) mol_container.conformations = conformations diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ + diff --git a/tests/test_streamio.py b/tests/test_streamio.py new file mode 100644 index 0000000..13b302b --- /dev/null +++ b/tests/test_streamio.py @@ -0,0 +1,120 @@ +"""Tests for PROPKA stream io""" +import logging +from pathlib import Path +from io import StringIO +import pytest +from propka.parameters import Parameters +from propka.molecular_container import MolecularContainer +from propka.input import read_parameter_file, read_molecule_file +from propka.lib import loadOptions + +from .test_basic_regression import get_test_dirs, compare_output + + +_LOGGER = logging.getLogger(__name__) + + +def get_paths(pdb): + """Helper function to get the path to the input and reference files""" + path_dict = get_test_dirs() + ref_path = path_dict["results"] / ("{0:s}.dat".format(pdb)) + pdb_path = path_dict["pdbs"] / ("{0:s}.pdb".format(pdb)) + + return ref_path.resolve(), pdb_path.resolve() + + +def run_propka_stream(options, input_file, filename): + """Run PROPKA software. + + Args: + options: list of PROPKA options + input_file: file-like PDB object + filename: filename for the file-like PDB object + """ + options += [filename] + args = loadOptions(options) + parameters = read_parameter_file(args.parameters, Parameters()) + molecule = MolecularContainer(parameters, args) + molecule = read_molecule_file(filename, molecule, stream=input_file) + molecule.calculate_pka() + molecule.write_pka() + if args.generate_propka_input: + molecule.write_propka() + + +@pytest.mark.parametrize("pdb, options", [ + pytest.param("1FTJ-Chain-A", [], id="1FTJ-Chain-A: no options"), + pytest.param('3SGB-subset', [ + "--titrate_only", + "E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"], + id="3SGB: --titrate_only"), + pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet"), +]) +def test_textio_filestream(tmpdir, pdb, options): + """Basic regression test using TextIO streams for the input PDB file""" + # Get the relevant paths + ref_path, pdb_path = get_paths(pdb) + filename = f"{pdb}.pdb" + + filestream = open(pdb_path, 'r') + + with tmpdir.as_cwd(): + run_propka_stream(options, filestream, filename) + compare_output(pdb, Path.cwd(), ref_path) + + filestream.close() + + +@pytest.mark.parametrize("pdb, options", [ + pytest.param("1FTJ-Chain-A", [], id="1FTJ-Chain-A: no options"), + pytest.param('3SGB-subset', [ + "--titrate_only", + "E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"], + id="3SGB: --titrate_only"), + pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet"), +]) +def test_stringio_filestream(tmpdir, pdb, options): + """Basic regression test using StringIO streams for the input PDB file""" + # Get the relevant paths + ref_path, pdb_path = get_paths(pdb) + filename = f"{pdb}.pdb" + + with open(pdb_path, 'r') as writer: + filestream = StringIO(writer.read()) + + with tmpdir.as_cwd(): + run_propka_stream(options, filestream, filename) + compare_output(pdb, Path.cwd(), ref_path) + + filestream.close() + + +def test_valuerror_nofiletype(): + """Tests for raised ValueError when an unknown filename is passed to + read_molecule_file""" + pdb = "1FTJ-Chain-A" + options = [] + + ref_path, pdb_path = get_paths(pdb) + + with open(pdb_path, 'r') as writer: + filestream = StringIO(writer.read()) + + errmsg = "Unknown input file type" + with pytest.raises(ValueError, match=errmsg): + run_propka_stream(options, filestream, filename="test.dat") + + +def test_valuerror_notpdb(): + """Tests for raised ValueError when a stream object that isn't a PDB + is passed to read_molecule_file""" + pdb = "1FTJ-Chain-A" + options = [] + + ref_path, pdb_path = get_paths(pdb) + + filestream = StringIO() + + errmsg = "The pdb file does not seem to contain any " + with pytest.raises(ValueError, match=errmsg): + run_propka_stream(options, filestream, filename="test.pdb")