streamlines read_molecule_file logic

This commit is contained in:
IAlibay
2020-07-15 17:44:47 +01:00
parent e3e0dd9878
commit 720a209440
2 changed files with 48 additions and 36 deletions

View File

@@ -35,13 +35,15 @@ def open_file_for_reading(input_file):
return file_ return file_
def read_molecule_file(input_file, mol_container, filename=None): def read_molecule_file(filename: str, mol_container, stream=None):
"""Read input file or stream (PDB or PROPKA) for a molecular container """Read input file or stream (PDB or PROPKA) for a molecular container
Args: Args:
input_file: input file or stream to read filename(str): name of input file. If not using a filestream via the
mol_container: MolecularContainer object ``stream`` argument, should be a path to the file to be read.
filename (str): optional input filename when using a filestream mol_container: MolecularContainer object.
stream: optional filestream handle. If ``None``, then open
``filename`` as a local file for reading.
Returns: Returns:
updated MolecularContainer object updated MolecularContainer object
@@ -51,7 +53,7 @@ def read_molecule_file(input_file, mol_container, filename=None):
Examples: Examples:
There are two main cases for using ``read_molecule_file``. The first There are two main cases for using ``read_molecule_file``. The first
(and most common) is to pass the input file (``input_file``) as a (and most common) is to pass the input file (``filename``) as a
string which gives the path of the molecule file to be read (here we string which gives the path of the molecule file to be read (here we
also pass a ``MoleculeContainer`` object named ``mol_container``). also pass a ``MoleculeContainer`` object named ``mol_container``).
@@ -59,39 +61,36 @@ def read_molecule_file(input_file, mol_container, filename=None):
<propka.molecular_container.MolecularContainer at 0x7f6e0c8f2310> <propka.molecular_container.MolecularContainer at 0x7f6e0c8f2310>
The other use case is when passing a file-like object, e.g. a The other use case is when passing a file-like object, e.g. a
``StringIO`` class, instance as the input file. In order to decide how ``StringIO`` class, instance. This is done by passing the object via
to process ``input_file``, ``read_molecule_file`` requires a file name. the ``stream`` argument. Since file-like objects do not usually have
Since file-like objects do not usually have an associated file name, we an associated file name, an appropirate file name should be passed to
must pass a value to the ``filename`` argument. This helps recognise the ``filename`` argument. In this case, ``filename`` is not opened for
the file type (based on the extension being either `.pdb` or reading, but instead is used to help recognise the file type (based on
`.propka_input`) and also associates that given ``filename`` with the the extension being either `.pdb` or `.propka_input`) and also uses
input MolecularContainer object. that given ``filename`` to assign a name to the input
MolecularContainer object.
>>> read_molecule_file(string_io_object, mol_container, >>> read_molecule_file('test.pdb', mol_container,
filename='test.pdb') stream=string_io_object)
<propka.molecular_container.MolecularContainer at 0x7f6e0c8f2310> <propka.molecular_container.MolecularContainer at 0x7f6e0c8f2310>
""" """
try:
input_path = Path(input_file)
except TypeError:
try:
input_path = Path(filename) input_path = Path(filename)
except TypeError:
errmsg = ("Path of provided input_file could not be determined "
"if passing a stream-like object, please provide an "
"appropriate string for the filename argument.")
raise TypeError(errmsg) from None
mol_container.name = input_path.stem mol_container.name = input_path.stem
input_file_extension = input_path.suffix input_file_extension = input_path.suffix
if stream is not None:
input_file = stream
else:
input_file = filename
if input_file_extension.lower() == '.pdb': if input_file_extension.lower() == '.pdb':
# input is a pdb file. read in atoms and top up containers to make # input is a pdb file. read in atoms and top up containers to make
# sure that all atoms are present in all conformations # sure that all atoms are present in all conformations
conformations, conformation_names = read_pdb( conformations, conformation_names = read_pdb(
input_file, mol_container.version.parameters, mol_container) input_file, mol_container.version.parameters, mol_container)
if len(conformations) == 0: if len(conformations) == 0:
str_ = ('Error: The pdb file does not seems to contain any ' str_ = ('Error: The pdb file does not seem to contain any '
'molecular conformations') 'molecular conformations')
raise ValueError(str_) raise ValueError(str_)
mol_container.conformations = conformations mol_container.conformations = conformations

View File

@@ -30,13 +30,12 @@ def run_propka_stream(options, input_file, filename):
options: list of PROPKA options options: list of PROPKA options
input_file: file-like PDB object input_file: file-like PDB object
filename: filename for the file-like PDB object filename: filename for the file-like PDB object
tmp_path: path for working directory
""" """
options += [filename] options += [filename]
args = loadOptions(options) args = loadOptions(options)
parameters = read_parameter_file(args.parameters, Parameters()) parameters = read_parameter_file(args.parameters, Parameters())
molecule = MolecularContainer(parameters, args) molecule = MolecularContainer(parameters, args)
molecule = read_molecule_file(input_file, molecule, filename) molecule = read_molecule_file(filename, molecule, stream=input_file)
molecule.calculate_pka() molecule.calculate_pka()
molecule.write_pka() molecule.write_pka()
if args.generate_propka_input: if args.generate_propka_input:
@@ -90,9 +89,9 @@ def test_stringio_filestream(tmpdir, pdb, options):
filestream.close() filestream.close()
def test_typerror_nofilename(tmpdir): def test_valuerror_nofiletype(tmpdir):
"""Tests for raised TypeError when not passing a filename to """Tests for raised ValueError when an unknown filename is passed to
read_molecule_file and using a file-like object without a name""" read_molecule_file"""
pdb = "1FTJ-Chain-A" pdb = "1FTJ-Chain-A"
options = [] options = []
@@ -101,8 +100,22 @@ def test_typerror_nofilename(tmpdir):
with open(pdb_path, 'r') as writer: with open(pdb_path, 'r') as writer:
filestream = StringIO(writer.read()) filestream = StringIO(writer.read())
with tmpdir.as_cwd(): errmsg = "Unknown input file type"
errmsg = "Path of provided input_file could not be determined" with pytest.raises(ValueError, match=errmsg):
with pytest.raises(TypeError, match=errmsg): run_propka_stream(options, filestream, filename="test.dat")
# default value of filename is None
run_propka_stream(options, filestream, filename=None)
def test_valuerror_notpdb(tmpdir):
"""Tests for raised ValueError when a stream object that isn't a PDB
is passed to read_molecule_file"""
pdb = "1FTJ-Chain-A"
options = []
ref_path, pdb_path = get_paths(pdb)
with open(pdb_path, 'r') as writer:
filestream = StringIO()
errmsg = "The pdb file does not seem to contain any "
with pytest.raises(ValueError, match=errmsg):
run_propka_stream(options, filestream, filename="test.pdb")