Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allows for file-like objects to be passed to read_molecule_file #84

Merged
merged 12 commits into from
Jul 16, 2020
56 changes: 46 additions & 10 deletions propka/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ def open_file_for_reading(input_file):

Args:
input_file: path to file or file-like object. If file-like object,
then will attempt fseek(0).
then will attempt seek(0).
"""
try:
input_file.fseek(0)
input_file.seek(0)
return input_file
except AttributeError:
pass
Expand All @@ -35,25 +35,61 @@ def open_file_for_reading(input_file):
return file_


def read_molecule_file(input_file, mol_container):
"""Read input file (PDB or PROPKA) for a molecular container
def read_molecule_file(input_file, mol_container, filename=None):
orbeckst marked this conversation as resolved.
Show resolved Hide resolved
"""Read input file or stream (PDB or PROPKA) for a molecular container

Args
input_file: input file to read
Args:
input_file: input file or stream to read
mol_container: MolecularContainer object
Returns
filename (str): optional input filename when using a filestream

Returns:
updated MolecularContainer object
Raises

Raises:
ValuError if invalid input given

Examples:
There are two main cases for using ``read_molecule_file``. The first
(and most common) is to pass the input file (``input_file``) as a
string which gives the path of the molecule file to be read (here we
also pass a ``MoleculeContainer`` object named ``mol_container``).

>>> read_molecule_file('test.pdb', mol_container)
<propka.molecular_container.MolecularContainer at 0x7f6e0c8f2310>

The other use case is when passing a file-like object, e.g. a
``StringIO`` class, instance as the input file. In order to decide how
to process ``input_file``, ``read_molecule_file`` requires a file name.
Since file-like objects do not usually have an associated file name, we
must pass a value to the ``filename`` argument. This helps recognise
the file type (based on the extension being either `.pdb` or
`.propka_input`) and also associates that given ``filename`` with the
input MolecularContainer object.

>>> read_molecule_file(string_io_object, mol_container,
filename='test.pdb')
<propka.molecular_container.MolecularContainer at 0x7f6e0c8f2310>

"""
input_path = Path(input_file)
try:
input_path = Path(input_file)
except TypeError:
try:
input_path = Path(filename)
except TypeError:
errmsg = ("Path of provided input_file could not be determined "
"if passing a stream-like object, please provide an "
"appropriate string for the filename argument.")
raise TypeError(errmsg) from None

mol_container.name = input_path.stem
input_file_extension = input_path.suffix
if input_file_extension.lower() == '.pdb':
# input is a pdb file. read in atoms and top up containers to make
# sure that all atoms are present in all conformations
conformations, conformation_names = read_pdb(
input_path, mol_container.version.parameters, mol_container)
input_file, mol_container.version.parameters, mol_container)
if len(conformations) == 0:
str_ = ('Error: The pdb file does not seems to contain any '
'molecular conformations')
Expand Down
1 change: 1 addition & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

66 changes: 66 additions & 0 deletions tests/test_basic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import re
from pathlib import Path
from io import StringIO
speleo3 marked this conversation as resolved.
Show resolved Hide resolved
import pytest
from numpy.testing import assert_almost_equal
from propka.parameters import Parameters
Expand Down Expand Up @@ -152,3 +153,68 @@ def test_regression(pdb, options, tmp_path):
run_propka(options, pdb_path, tmp_path)
if ref_path is not None:
compare_output(pdb, tmp_path, ref_path)


def run_propka_stream(options, input_file, filename, tmp_path):
"""Run PROPKA software.

Args:
options: list of PROPKA options
input_file: file-like PDB object
tmp_path: path for working directory
"""
options += [filename]
args = loadOptions(options)
try:
_LOGGER.warning(
"Working in tmpdir {0:s} because of PROPKA file output; "
"need to fix this.".format(str(tmp_path)))
cwd = Path.cwd()
os.chdir(tmp_path)
parameters = read_parameter_file(args.parameters, Parameters())
molecule = MolecularContainer(parameters, args)
molecule = read_molecule_file(input_file, molecule,
filename=filename)
molecule.calculate_pka()
molecule.write_pka()
if args.generate_propka_input:
molecule.write_propka()
finally:
os.chdir(cwd)


@pytest.mark.parametrize("pdb, options", [
pytest.param("1FTJ-Chain-A", [], id="1FTJ-Chain-A: no options"),
pytest.param('1HPX', [], id="1HPX: no options"),
pytest.param('4DFR', [], id="4DFR: no options"),
pytest.param('3SGB', [], id="3SGB: no options"),
pytest.param('3SGB-subset', [
"--titrate_only",
"E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"],
id="3SGB: --titrate_only"),
pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet")])
def test_filestream_regression(pdb, options, tmp_path):
"""Basic regression but using streams for the input PDB file"""
path_dict = get_test_dirs()
ref_path = path_dict["results"] / ("{0:s}.dat".format(pdb))
if ref_path.is_file():
ref_path = ref_path.resolve()
else:
_LOGGER.warning("Missing results file for comparison: {0:s}".format(
str(ref_path)))
ref_path = None
pdb_path = path_dict["pdbs"] / ("{0:s}.pdb".format(pdb))
if pdb_path.is_file():
pdb_path = pdb_path.resolve()
#input_file = open(pdb_path)
with open(pdb_path, 'r') as writer:
io_file = StringIO(writer.read())
else:
errstr = "Missing PDB file: {0:s}".format(pdb_path)
raise FileNotFoundError(errstr)
tmp_path = Path(tmp_path).resolve()

run_propka_stream(options, io_file, f"{pdb}.pdb", tmp_path)

if ref_path is not None:
compare_output(pdb, tmp_path, ref_path)
108 changes: 108 additions & 0 deletions tests/test_streamio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
"""Tests for PROPKA stream io"""
import logging
from pathlib import Path
from io import StringIO
import pytest
from propka.parameters import Parameters
from propka.molecular_container import MolecularContainer
from propka.input import read_parameter_file, read_molecule_file
from propka.lib import loadOptions

from .test_basic_regression import get_test_dirs, compare_output


_LOGGER = logging.getLogger(__name__)


def get_paths(pdb):
"""Helper function to get the path to the input and reference files"""
path_dict = get_test_dirs()
ref_path = path_dict["results"] / ("{0:s}.dat".format(pdb))
pdb_path = path_dict["pdbs"] / ("{0:s}.pdb".format(pdb))

return ref_path.resolve(), pdb_path.resolve()


def run_propka_stream(options, input_file, filename):
"""Run PROPKA software.

Args:
options: list of PROPKA options
input_file: file-like PDB object
filename: filename for the file-like PDB object
tmp_path: path for working directory
"""
options += [filename]
args = loadOptions(options)
parameters = read_parameter_file(args.parameters, Parameters())
molecule = MolecularContainer(parameters, args)
molecule = read_molecule_file(input_file, molecule, filename)
molecule.calculate_pka()
molecule.write_pka()
if args.generate_propka_input:
molecule.write_propka()


@pytest.mark.parametrize("pdb, options", [
pytest.param("1FTJ-Chain-A", [], id="1FTJ-Chain-A: no options"),
pytest.param('3SGB-subset', [
"--titrate_only",
"E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"],
id="3SGB: --titrate_only"),
pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet"),
])
def test_textio_filestream(tmpdir, pdb, options):
"""Basic regression test using TextIO streams for the input PDB file"""
# Get the relevant paths
ref_path, pdb_path = get_paths(pdb)
filename = f"{pdb}.pdb"

filestream = open(pdb_path, 'r')

with tmpdir.as_cwd():
run_propka_stream(options, filestream, filename)
compare_output(pdb, Path.cwd(), ref_path)

filestream.close()


@pytest.mark.parametrize("pdb, options", [
pytest.param("1FTJ-Chain-A", [], id="1FTJ-Chain-A: no options"),
pytest.param('3SGB-subset', [
"--titrate_only",
"E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"],
id="3SGB: --titrate_only"),
pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet"),
])
def test_stringio_filestream(tmpdir, pdb, options):
"""Basic regression test using StringIO streams for the input PDB file"""
# Get the relevant paths
ref_path, pdb_path = get_paths(pdb)
filename = f"{pdb}.pdb"

with open(pdb_path, 'r') as writer:
filestream = StringIO(writer.read())

with tmpdir.as_cwd():
run_propka_stream(options, filestream, filename)
compare_output(pdb, Path.cwd(), ref_path)

filestream.close()


def test_typerror_nofilename(tmpdir):
"""Tests for raised TypeError when not passing a filename to
read_molecule_file and using a file-like object without a name"""
pdb = "1FTJ-Chain-A"
options = []

ref_path, pdb_path = get_paths(pdb)

with open(pdb_path, 'r') as writer:
filestream = StringIO(writer.read())

with tmpdir.as_cwd():
errmsg = "Path of provided input_file could not be determined"
with pytest.raises(TypeError, match=errmsg):
# default value of filename is None
run_propka_stream(options, filestream, filename=None)