Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allows for file-like objects to be passed to read_molecule_file #84

Merged
merged 12 commits into from
Jul 16, 2020
59 changes: 47 additions & 12 deletions propka/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ def open_file_for_reading(input_file):

Args:
input_file: path to file or file-like object. If file-like object,
then will attempt fseek(0).
then will attempt seek(0).
"""
try:
input_file.fseek(0)
input_file.seek(0)
return input_file
except AttributeError:
pass
Expand All @@ -35,27 +35,62 @@ def open_file_for_reading(input_file):
return file_


def read_molecule_file(input_file, mol_container):
"""Read input file (PDB or PROPKA) for a molecular container
def read_molecule_file(filename: str, mol_container, stream=None):
"""Read input file or stream (PDB or PROPKA) for a molecular container

Args
input_file: input file to read
mol_container: MolecularContainer object
Returns
Args:
filename(str): name of input file. If not using a filestream via the
``stream`` argument, should be a path to the file to be read.
mol_container: MolecularContainer object.
stream: optional filestream handle. If ``None``, then open
``filename`` as a local file for reading.

Returns:
updated MolecularContainer object
Raises

Raises:
ValuError if invalid input given

Examples:
There are two main cases for using ``read_molecule_file``. The first
(and most common) is to pass the input file (``filename``) as a
string which gives the path of the molecule file to be read (here we
also pass a ``MoleculeContainer`` object named ``mol_container``).

>>> read_molecule_file('test.pdb', mol_container)
<propka.molecular_container.MolecularContainer at 0x7f6e0c8f2310>

The other use case is when passing a file-like object, e.g. a
``StringIO`` class, instance. This is done by passing the object via
the ``stream`` argument. Since file-like objects do not usually have
an associated file name, an appropirate file name should be passed to
the ``filename`` argument. In this case, ``filename`` is not opened for
reading, but instead is used to help recognise the file type (based on
the extension being either `.pdb` or `.propka_input`) and also uses
that given ``filename`` to assign a name to the input
MolecularContainer object.

>>> read_molecule_file('test.pdb', mol_container,
stream=string_io_object)
<propka.molecular_container.MolecularContainer at 0x7f6e0c8f2310>

"""
input_path = Path(input_file)
input_path = Path(filename)
mol_container.name = input_path.stem
input_file_extension = input_path.suffix

if stream is not None:
input_file = stream
else:
input_file = filename

if input_file_extension.lower() == '.pdb':
# input is a pdb file. read in atoms and top up containers to make
# sure that all atoms are present in all conformations
conformations, conformation_names = read_pdb(
input_path, mol_container.version.parameters, mol_container)
input_file, mol_container.version.parameters, mol_container)
if len(conformations) == 0:
str_ = ('Error: The pdb file does not seems to contain any '
str_ = ('Error: The pdb file does not seem to contain any '
'molecular conformations')
raise ValueError(str_)
mol_container.conformations = conformations
Expand Down
1 change: 1 addition & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

120 changes: 120 additions & 0 deletions tests/test_streamio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
"""Tests for PROPKA stream io"""
import logging
from pathlib import Path
from io import StringIO
import pytest
from propka.parameters import Parameters
from propka.molecular_container import MolecularContainer
from propka.input import read_parameter_file, read_molecule_file
from propka.lib import loadOptions

from .test_basic_regression import get_test_dirs, compare_output


_LOGGER = logging.getLogger(__name__)


def get_paths(pdb):
"""Helper function to get the path to the input and reference files"""
path_dict = get_test_dirs()
ref_path = path_dict["results"] / ("{0:s}.dat".format(pdb))
pdb_path = path_dict["pdbs"] / ("{0:s}.pdb".format(pdb))

return ref_path.resolve(), pdb_path.resolve()


def run_propka_stream(options, input_file, filename):
"""Run PROPKA software.

Args:
options: list of PROPKA options
input_file: file-like PDB object
filename: filename for the file-like PDB object
"""
options += [filename]
args = loadOptions(options)
parameters = read_parameter_file(args.parameters, Parameters())
molecule = MolecularContainer(parameters, args)
molecule = read_molecule_file(filename, molecule, stream=input_file)
molecule.calculate_pka()
molecule.write_pka()
if args.generate_propka_input:
molecule.write_propka()


@pytest.mark.parametrize("pdb, options", [
pytest.param("1FTJ-Chain-A", [], id="1FTJ-Chain-A: no options"),
pytest.param('3SGB-subset', [
"--titrate_only",
"E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"],
id="3SGB: --titrate_only"),
pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet"),
])
def test_textio_filestream(tmpdir, pdb, options):
"""Basic regression test using TextIO streams for the input PDB file"""
# Get the relevant paths
ref_path, pdb_path = get_paths(pdb)
filename = f"{pdb}.pdb"

filestream = open(pdb_path, 'r')

with tmpdir.as_cwd():
run_propka_stream(options, filestream, filename)
compare_output(pdb, Path.cwd(), ref_path)

filestream.close()


@pytest.mark.parametrize("pdb, options", [
pytest.param("1FTJ-Chain-A", [], id="1FTJ-Chain-A: no options"),
pytest.param('3SGB-subset', [
"--titrate_only",
"E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"],
id="3SGB: --titrate_only"),
pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet"),
])
def test_stringio_filestream(tmpdir, pdb, options):
"""Basic regression test using StringIO streams for the input PDB file"""
# Get the relevant paths
ref_path, pdb_path = get_paths(pdb)
filename = f"{pdb}.pdb"

with open(pdb_path, 'r') as writer:
filestream = StringIO(writer.read())

with tmpdir.as_cwd():
run_propka_stream(options, filestream, filename)
compare_output(pdb, Path.cwd(), ref_path)

filestream.close()


def test_valuerror_nofiletype(tmpdir):
"""Tests for raised ValueError when an unknown filename is passed to
read_molecule_file"""
pdb = "1FTJ-Chain-A"
options = []

ref_path, pdb_path = get_paths(pdb)

with open(pdb_path, 'r') as writer:
filestream = StringIO(writer.read())

errmsg = "Unknown input file type"
with pytest.raises(ValueError, match=errmsg):
run_propka_stream(options, filestream, filename="test.dat")
IAlibay marked this conversation as resolved.
Show resolved Hide resolved


def test_valuerror_notpdb(tmpdir):
"""Tests for raised ValueError when a stream object that isn't a PDB
is passed to read_molecule_file"""
pdb = "1FTJ-Chain-A"
options = []

ref_path, pdb_path = get_paths(pdb)

filestream = StringIO()

errmsg = "The pdb file does not seem to contain any "
with pytest.raises(ValueError, match=errmsg):
run_propka_stream(options, filestream, filename="test.pdb")
IAlibay marked this conversation as resolved.
Show resolved Hide resolved