diff --git a/phlex/app/phlex.cpp b/phlex/app/phlex.cpp index 5f9d52d30..40c8c3212 100644 --- a/phlex/app/phlex.cpp +++ b/phlex/app/phlex.cpp @@ -92,5 +92,13 @@ int main(int argc, char* argv[]) if (not vm["parallel"].defaulted()) { max_concurrency = vm["parallel"].as(); } - phlex::experimental::run(configurations, max_concurrency); + try { + phlex::experimental::run(configurations, max_concurrency); + } catch (std::exception const& e) { + std::cerr << e.what() << '\n'; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught.\n"; + return 1; + } } diff --git a/phlex/configuration.hpp b/phlex/configuration.hpp index 9882efe85..cd2804fa3 100644 --- a/phlex/configuration.hpp +++ b/phlex/configuration.hpp @@ -6,6 +6,7 @@ #include #include +#include #include namespace phlex::experimental { @@ -48,6 +49,25 @@ namespace phlex::experimental { std::vector keys() const; + // Internal function for prototype purposes; do not use as this will change. + std::pair prototype_internal_kind(std::string const& key) const + { + auto const& value = config_.at(key); // may throw + + auto k = value.kind(); + bool is_array = k == boost::json::kind::array; + + if (is_array) { + // The current configuration interface only supports homogenous containers, + // thus checking only the first element suffices. (This assumes arrays are + // not nested, which is fine for now.) + boost::json::array const& arr = value.as_array(); + k = arr.empty() ? boost::json::kind::null : arr[0].kind(); + } + + return std::make_pair(k, is_array); + } + private: boost::json::object config_; }; diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt index 6a545f932..02321dd54 100644 --- a/plugins/CMakeLists.txt +++ b/plugins/CMakeLists.txt @@ -1,3 +1,8 @@ +# Phlex provided core plugins + +# plugin for running Python algorithms in phlex +add_subdirectory(python) + add_library(layer_generator layer_generator.cpp) target_link_libraries(layer_generator PRIVATE phlex::core) diff --git a/plugins/python/CMakeLists.txt b/plugins/python/CMakeLists.txt new file mode 100644 index 000000000..5b32c49f6 --- /dev/null +++ b/plugins/python/CMakeLists.txt @@ -0,0 +1,94 @@ +find_package( + Python 3.12 + COMPONENTS Interpreter Development + QUIET + ) + +if(Python_FOUND) + + # Verify installation of necessary python modules for specific tests + + function(check_python_module_version MODULE_NAME MIN_VERSION OUT_VAR) + execute_process( + COMMAND + ${Python_EXECUTABLE} -c "import sys +try: + import ${MODULE_NAME} + from packaging.version import parse as parse_version + installed_version = getattr(${MODULE_NAME}, '__version__', None) + if parse_version(installed_version) >= parse_version('${MIN_VERSION}'): + sys.exit(0) + else: + sys.exit(2) # Version too low +except ImportError: + sys.exit(1)" + RESULT_VARIABLE _module_check_result + ) + + if(_module_check_result EQUAL 0) + set(${OUT_VAR} + TRUE + PARENT_SCOPE + ) + elseif(_module_check_result EQUAL 1) + set(${OUT_VAR} + FALSE + PARENT_SCOPE + ) # silent b/c common + elseif(_module_check_result EQUAL 2) + message( + WARNING + "Python module '${MODULE_NAME}' found but version too low (min required: ${MIN_VERSION})." + ) + set(${OUT_VAR} + FALSE + PARENT_SCOPE + ) + else() + message( + WARNING "Unknown error while checking Python module '${MODULE_NAME}'." + ) + set(${OUT_VAR} + FALSE + PARENT_SCOPE + ) + endif() + endfunction() + + check_python_module_version("numpy" "2.0.0" HAS_NUMPY) + + # Phlex module to run Python algorithms + add_library( + pymodule MODULE src/pymodule.cpp src/modulewrap.cpp src/configwrap.cpp + src/lifelinewrap.cpp src/errorwrap.cpp + ) + include_directories(pymodule, ${Python_INCLUDE_DIRS}) + target_link_libraries( + pymodule + PRIVATE phlex::module ${Python_LIBRARIES} + PUBLIC Python::Python + ) + + # numpy support if installed + if(HAS_NUMPY) + + # locate numpy's header directory + execute_process( + COMMAND "${Python_EXECUTABLE}" -c + "import numpy; print(numpy.get_include(), end='')" + RESULT_VARIABLE NUMPY_RESULT + OUTPUT_VARIABLE NUMPY_INCLUDE_DIR + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + if(NUMPY_RESULT EQUAL 0) + include_directories(pymodule PRIVATE ${NUMPY_INCLUDE_DIR}) + target_compile_definitions( + pymodule PRIVATE PHLEX_HAVE_NUMPY=1 + NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION + ) + endif() + + endif() + +endif() # Python available diff --git a/plugins/python/src/configwrap.cpp b/plugins/python/src/configwrap.cpp new file mode 100644 index 000000000..fb4852a07 --- /dev/null +++ b/plugins/python/src/configwrap.cpp @@ -0,0 +1,213 @@ +#include +#include + +#include "phlex/configuration.hpp" +#include "wrap.hpp" + +using namespace phlex::experimental; + +// Create a dict-like access to the configuration from Python. +// clang-format off +struct phlex::experimental::py_config_map { + PyObject_HEAD + phlex::experimental::configuration const* ph_config; + PyObject* ph_config_cache; +}; +// clang-format on + +PyObject* phlex::experimental::wrap_configuration(configuration const* config) +{ + if (!config) { + PyErr_SetString(PyExc_ValueError, "provided configuration is null"); + return nullptr; + } + + py_config_map* pyconfig = + (py_config_map*)PhlexConfig_Type.tp_new(&PhlexConfig_Type, nullptr, nullptr); + + pyconfig->ph_config = config; + + return (PyObject*)pyconfig; +} + +//= CPyCppyy low level view construction/destruction ========================= +static py_config_map* pcm_new(PyTypeObject* subtype, PyObject*, PyObject*) +{ + py_config_map* pcm = (py_config_map*)subtype->tp_alloc(subtype, 0); + if (!pcm) + return nullptr; + + pcm->ph_config_cache = PyDict_New(); + + return pcm; +} + +static void pcm_dealloc(py_config_map* pcm) +{ + Py_DECREF(pcm->ph_config_cache); + Py_TYPE(pcm)->tp_free((PyObject*)pcm); +} + +static PyObject* pcm_subscript(py_config_map* pycmap, PyObject* pykey) +{ + // Retrieve a named configuration setting. + // + // Configuration should have a single in-memory representation, which is why + // the current approach retrieves it from the equivalent C++ object, ie. after + // the JSON input has been parsed, even as there are Python JSON parsers. + // + // pykey: the lookup key to retrieve the configuration value + + if (!PyUnicode_Check(pykey)) { + PyErr_SetString(PyExc_TypeError, "__getitem__ expects a string key"); + return nullptr; + } + + // cached lookup + PyObject* pyvalue = PyDict_GetItem(pycmap->ph_config_cache, pykey); + if (pyvalue) { + Py_INCREF(pyvalue); + return pyvalue; + } + PyErr_Clear(); + + std::string ckey = PyUnicode_AsUTF8(pykey); + + try { + auto k = pycmap->ph_config->prototype_internal_kind(ckey); + if (k.second /* is array */) { + if (k.first == boost::json::kind::bool_) { + auto const& cvalue = pycmap->ph_config->get>(ckey); + pyvalue = PyTuple_New(cvalue.size()); + for (Py_ssize_t i = 0; i < (Py_ssize_t)cvalue.size(); ++i) { + PyObject* item = PyLong_FromLong((long)cvalue[i]); + PyTuple_SetItem(pyvalue, i, item); + } + } else if (k.first == boost::json::kind::int64) { + auto const& cvalue = pycmap->ph_config->get>(ckey); + pyvalue = PyTuple_New(cvalue.size()); + for (Py_ssize_t i = 0; i < (Py_ssize_t)cvalue.size(); ++i) { + PyObject* item = PyLong_FromLong(cvalue[i]); + PyTuple_SetItem(pyvalue, i, item); + } + } else if (k.first == boost::json::kind::uint64) { + auto const& cvalue = pycmap->ph_config->get>(ckey); + pyvalue = PyTuple_New(cvalue.size()); + for (Py_ssize_t i = 0; i < (Py_ssize_t)cvalue.size(); ++i) { + PyObject* item = PyLong_FromUnsignedLong(cvalue[i]); + PyTuple_SetItem(pyvalue, i, item); + } + } else if (k.first == boost::json::kind::double_) { + auto const& cvalue = pycmap->ph_config->get>(ckey); + pyvalue = PyTuple_New(cvalue.size()); + for (Py_ssize_t i = 0; i < (Py_ssize_t)cvalue.size(); ++i) { + PyObject* item = PyFloat_FromDouble(cvalue[i]); + PyTuple_SetItem(pyvalue, i, item); + } + } else if (k.first == boost::json::kind::string) { + auto const& cvalue = pycmap->ph_config->get>(ckey); + pyvalue = PyTuple_New(cvalue.size()); + for (Py_ssize_t i = 0; i < (Py_ssize_t)cvalue.size(); ++i) { + PyObject* item = PyUnicode_FromStringAndSize(cvalue[i].c_str(), cvalue[i].size()); + PyTuple_SetItem(pyvalue, i, item); + } + } + } else { + if (k.first == boost::json::kind::bool_) { + auto cvalue = pycmap->ph_config->get(ckey); + pyvalue = PyBool_FromLong((long)cvalue); + } else if (k.first == boost::json::kind::int64) { + auto cvalue = pycmap->ph_config->get(ckey); + pyvalue = PyLong_FromLong(cvalue); + } else if (k.first == boost::json::kind::uint64) { + auto cvalue = pycmap->ph_config->get(ckey); + pyvalue = PyLong_FromUnsignedLong(cvalue); + } else if (k.first == boost::json::kind::double_) { + auto cvalue = pycmap->ph_config->get(ckey); + pyvalue = PyFloat_FromDouble(cvalue); + } else if (k.first == boost::json::kind::string) { + auto const& cvalue = pycmap->ph_config->get(ckey); + pyvalue = PyUnicode_FromStringAndSize(cvalue.c_str(), cvalue.size()); + } + } + } catch (std::runtime_error const&) { + PyErr_Format(PyExc_TypeError, "property \"%s\" does not exist", ckey.c_str()); + } + + // cache if found + if (pyvalue) { + PyDict_SetItem(pycmap->ph_config_cache, pykey, pyvalue); + } + + return pyvalue; +} + +static PyMappingMethods pcm_as_mapping = {nullptr, (binaryfunc)pcm_subscript, nullptr}; + +// clang-format off +PyTypeObject phlex::experimental::PhlexConfig_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + (char*) "pyphlex.configuration", // tp_name + sizeof(py_config_map), // tp_basicsize + 0, // tp_itemsize + (destructor)pcm_dealloc, // tp_dealloc + 0, // tp_vectorcall_offset / tp_print + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_as_async / tp_compare + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + &pcm_as_mapping, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + 0, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + (char*)"phlex configuration object-as-dictionary", // tp_doc + 0, // tp_traverse + 0, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + 0, // tp_iter + 0, // tp_iternext + 0, // tp_methods + 0, // tp_members + 0, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + offsetof(py_config_map, ph_config_cache), // tp_dictoffset + 0, // tp_init + 0, // tp_alloc + (newfunc)pcm_new, // tp_new + 0, // tp_free + 0, // tp_is_gc + 0, // tp_bases + 0, // tp_mro + 0, // tp_cache + 0, // tp_subclasses + 0 // tp_weaklist +#if PY_VERSION_HEX >= 0x02030000 + , 0 // tp_del +#endif +#if PY_VERSION_HEX >= 0x02060000 + , 0 // tp_version_tag +#endif +#if PY_VERSION_HEX >= 0x03040000 + , 0 // tp_finalize +#endif +#if PY_VERSION_HEX >= 0x03080000 + , 0 // tp_vectorcall +#endif +#if PY_VERSION_HEX >= 0x030c0000 + , 0 // tp_watched +#endif +#if PY_VERSION_HEX >= 0x030d0000 + , 0 // tp_versions_used +#endif +}; +// clang-format on diff --git a/plugins/python/src/errorwrap.cpp b/plugins/python/src/errorwrap.cpp new file mode 100644 index 000000000..c8a073ee4 --- /dev/null +++ b/plugins/python/src/errorwrap.cpp @@ -0,0 +1,94 @@ +#include "wrap.hpp" + +#include + +using namespace phlex::experimental; + +static bool format_traceback(std::string& msg, +#if PY_VERSION_HEX < 0x30c000000 + PyObject* type, + PyObject* value, + PyObject* traceback) +#else + PyObject* exception) +#endif +{ + PyObject* tbmod = PyImport_ImportModule("traceback"); + PyObject* format_exception = PyObject_GetAttrString(tbmod, "format_exception"); + Py_DECREF(tbmod); + + PyObject* formatted_tb = PyObject_CallFunctionObjArgs( +#if PY_VERSION_HEX < 0x30c000000 + format_exception, type, value, traceback, nullptr); +#else + format_exception, exception, nullptr); +#endif + Py_DECREF(format_exception); + + if (!formatted_tb) { + PyErr_Clear(); + return false; + } + + PyObject* py_msg_empty = PyUnicode_FromString(""); + PyObject* py_msg = PyUnicode_Join(py_msg_empty, formatted_tb); + Py_DECREF(py_msg_empty); + Py_DECREF(formatted_tb); + + if (!py_msg) { + PyErr_Clear(); + return false; + } + + char const* c_msg = PyUnicode_AsUTF8(py_msg); + if (c_msg) { + msg = c_msg; + Py_DECREF(py_msg); + return true; + } + + PyErr_Clear(); + Py_DECREF(py_msg); + return false; +} + +bool phlex::experimental::msg_from_py_error(std::string& msg, bool check_error) +{ + PyGILRAII g; + + if (check_error) { + if (!PyErr_Occurred()) + return false; + } + +#if PY_VERSION_HEX < 0x30c000000 + PyObject *type = nullptr, *value = nullptr, *traceback = nullptr; + PyErr_Fetch(&type, &value, &traceback); + if (value) { + bool tb_ok = format_traceback(msg, type, value, traceback); + if (!tb_ok) { + PyObject* pymsg = PyObject_Str(value); + msg = PyUnicode_AsUTF8(pymsg); + Py_DECREF(pymsg); + } + } else { + msg = "unknown Python error occurred"; + } + Py_XDECREF(traceback); + Py_XDECREF(value); + Py_XDECREF(type); +#else + PyObject* exc = PyErr_GetRaisedException(); + if (exc) { + bool tb_ok = format_traceback(msg, exc); + if (!tb_ok) { + PyObject* pymsg = PyObject_Str(exc); + msg = PyUnicode_AsUTF8(pymsg); + Py_DECREF(pymsg); + } + Py_DECREF(exc); + } +#endif + + return true; +} diff --git a/plugins/python/src/lifelinewrap.cpp b/plugins/python/src/lifelinewrap.cpp new file mode 100644 index 000000000..0f81e6bb1 --- /dev/null +++ b/plugins/python/src/lifelinewrap.cpp @@ -0,0 +1,105 @@ +#include +#include + +#include "wrap.hpp" + +using namespace phlex::experimental; + +static py_lifeline_t* ll_new(PyTypeObject* pytype, PyObject*, PyObject*) +{ + py_lifeline_t* pyobj = (py_lifeline_t*)pytype->tp_alloc(pytype, 0); + if (!pyobj) + PyErr_Print(); + pyobj->m_view = nullptr; + new (&pyobj->m_source) std::shared_ptr{}; + + return pyobj; +} + +static int ll_traverse(py_lifeline_t* pyobj, visitproc visit, void* args) +{ + if (pyobj->m_view) + visit(pyobj->m_view, args); + return 0; +} + +static int ll_clear(py_lifeline_t* pyobj) +{ + Py_CLEAR(pyobj->m_view); + return 0; +} + +static void ll_dealloc(py_lifeline_t* pyobj) +{ + Py_CLEAR(pyobj->m_view); + typedef std::shared_ptr generic_shared_t; + pyobj->m_source.~generic_shared_t(); +} + +// clang-format off +PyTypeObject phlex::experimental::PhlexLifeline_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + (char*) "pyphlex.lifeline", // tp_name + sizeof(py_lifeline_t), // tp_basicsize + 0, // tp_itemsize + (destructor)ll_dealloc, // tp_dealloc + 0, // tp_vectorcall_offset / tp_print + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_as_async / tp_compare + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + 0, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + 0, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, // tp_flags + (char*)"internal", // tp_doc + (traverseproc)ll_traverse, // tp_traverse + (inquiry)ll_clear, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + 0, // tp_iter + 0, // tp_iternext + 0, // tp_methods + 0, // tp_members + 0, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + 0, // tp_dictoffset + 0, // tp_init + 0, // tp_alloc + (newfunc)ll_new, // tp_new + 0, // tp_free + 0, // tp_is_gc + 0, // tp_bases + 0, // tp_mro + 0, // tp_cache + 0, // tp_subclasses + 0 // tp_weaklist +#if PY_VERSION_HEX >= 0x02030000 + , 0 // tp_del +#endif +#if PY_VERSION_HEX >= 0x02060000 + , 0 // tp_version_tag +#endif +#if PY_VERSION_HEX >= 0x03040000 + , 0 // tp_finalize +#endif +#if PY_VERSION_HEX >= 0x03080000 + , 0 // tp_vectorcall +#endif +#if PY_VERSION_HEX >= 0x030c0000 + , 0 // tp_watched +#endif +#if PY_VERSION_HEX >= 0x030d0000 + , 0 // tp_versions_used +#endif +}; +// clang-format on diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp new file mode 100644 index 000000000..65109e71e --- /dev/null +++ b/plugins/python/src/modulewrap.cpp @@ -0,0 +1,855 @@ +#include "phlex/module.hpp" +#include "wrap.hpp" + +#include +#include +#include +#include +#include + +#ifdef PHLEX_HAVE_NUMPY +#define NO_IMPORT_ARRAY +#define PY_ARRAY_UNIQUE_SYMBOL phlex_ARRAY_API +#include +#endif + +using namespace phlex::experimental; + +// TODO: the layer is currently hard-wired and should come from the product +// specification instead, but that doesn't exist in Python yet. +static std::string const LAYER = "job"; + +// Simple phlex module wrapper +// clang-format off +struct phlex::experimental::py_phlex_module { + PyObject_HEAD + phlex_module_t* ph_module; +}; +// clang-format on + +PyObject* phlex::experimental::wrap_module(phlex_module_t* module_) +{ + if (!module_) { + PyErr_SetString(PyExc_ValueError, "provided module is null"); + return nullptr; + } + + py_phlex_module* pymod = PyObject_New(py_phlex_module, &PhlexModule_Type); + pymod->ph_module = module_; + + return (PyObject*)pymod; +} + +namespace { + + // TODO: wishing for std::views::join_with() in C++23, but until then: + static std::string stringify(std::vector& v) + { + std::ostringstream oss; + if (!v.empty()) { + oss << v.front(); + for (std::size_t i = 1; i < v.size(); ++i) { + oss << ", " << v[i]; + } + } + return oss.str(); + } + + static inline PyObject* lifeline_transform(intptr_t arg) + { + if (Py_TYPE((PyObject*)arg) == &PhlexLifeline_Type) { + return ((py_lifeline_t*)arg)->m_view; + } + return (PyObject*)arg; + } + + // callable object managing the callback + template + struct py_callback { + PyObject const* m_callable; // owned + + py_callback(PyObject const* callable) + { + Py_INCREF(callable); + m_callable = callable; + } + py_callback(py_callback const& pc) + { + Py_INCREF(pc.m_callable); + m_callable = pc.m_callable; + } + py_callback& operator=(py_callback const& pc) + { + if (this != &pc) { + Py_INCREF(pc.m_callable); + m_callable = pc.m_callable; + } + } + ~py_callback() { Py_DECREF(m_callable); } + + template + intptr_t call(Args... args) + { + static_assert(sizeof...(Args) == N, "Argument count mismatch"); + + PyGILRAII gil; + + PyObject* result = + PyObject_CallFunctionObjArgs((PyObject*)m_callable, lifeline_transform(args)..., nullptr); + + std::string error_msg; + if (!result) { + if (!msg_from_py_error(error_msg)) + error_msg = "Unknown python error"; + } + + decref_all(args...); + + if (!error_msg.empty()) + throw std::runtime_error(error_msg.c_str()); + + return (intptr_t)result; + } + + template + void callv(Args... args) + { + static_assert(sizeof...(Args) == N, "Argument count mismatch"); + + PyGILRAII gil; + + PyObject* result = + PyObject_CallFunctionObjArgs((PyObject*)m_callable, (PyObject*)args..., nullptr); + + std::string error_msg; + if (!result) { + if (!msg_from_py_error(error_msg)) + error_msg = "Unknown python error"; + } else + Py_DECREF(result); + + decref_all(args...); + + if (!error_msg.empty()) + throw std::runtime_error(error_msg.c_str()); + } + + private: + template + void decref_all(Args... args) + { + // helper to decrement reference counts of N arguments + (Py_DECREF((PyObject*)args), ...); + } + }; + + // use explicit instatiations to ensure that the function signature can + // be derived by the graph builder + struct py_callback_1 : public py_callback<1> { + intptr_t operator()(intptr_t arg0) { return call(arg0); } + }; + + struct py_callback_2 : public py_callback<2> { + intptr_t operator()(intptr_t arg0, intptr_t arg1) { return call(arg0, arg1); } + }; + + struct py_callback_3 : public py_callback<3> { + intptr_t operator()(intptr_t arg0, intptr_t arg1, intptr_t arg2) + { + return call(arg0, arg1, arg2); + } + }; + + struct py_callback_1v : public py_callback<1> { + void operator()(intptr_t arg0) { callv(arg0); } + }; + + struct py_callback_2v : public py_callback<2> { + void operator()(intptr_t arg0, intptr_t arg1) { callv(arg0, arg1); } + }; + + struct py_callback_3v : public py_callback<3> { + void operator()(intptr_t arg0, intptr_t arg1, intptr_t arg2) { callv(arg0, arg1, arg2); } + }; + + static std::vector cseq(PyObject* coll) + { + size_t len = coll ? (size_t)PySequence_Size(coll) : 0; + std::vector cargs{len}; + + for (size_t i = 0; i < len; ++i) { + PyObject* item = PySequence_GetItem(coll, i); + if (item) { + char const* p = PyUnicode_AsUTF8(item); + if (p) { + Py_ssize_t sz = PyUnicode_GetLength(item); + cargs[i].assign(p, (std::string::size_type)sz); + } + Py_DECREF(item); + + if (!p) { + PyErr_Format(PyExc_TypeError, "could not convert item %d to string", (int)i); + break; + } + } else + break; // Python error already set + } + + return cargs; + } + +} // unnamed namespace + +namespace { + + static std::string annotation_as_text(PyObject* pyobj) + { + std::string ann; + if (!PyUnicode_Check(pyobj)) { + PyObject* pystr = PyObject_GetAttrString(pyobj, "__name__"); // eg. for classes + if (!pystr) { + PyErr_Clear(); + pystr = PyObject_Str(pyobj); + } + + char const* cstr = PyUnicode_AsUTF8(pystr); + if (cstr) + ann = cstr; + Py_DECREF(pystr); + + // for numpy typing, there's no useful way of figuring out the type from the + // name of the type, only from its string representation, so fall through and + // let this method return str() + if (ann != "ndarray") + return ann; + + // start over for numpy type using result from str() + pystr = PyObject_Str(pyobj); + cstr = PyUnicode_AsUTF8(pystr); + if (cstr) // if failed, ann will remain "ndarray" + ann = cstr; + Py_DECREF(pystr); + return ann; + } + + // unicode object, i.e. string name of the type + char const* cstr = PyUnicode_AsUTF8(pyobj); + if (cstr) + ann = cstr; + else + PyErr_Clear(); + + return ann; + } + + // converters of builtin types; TODO: this is a basic subset only, b/c either + // these will be generated from the IDL, or they should be picked up from an + // existing place such as cppyy + + static bool pylong_as_bool(PyObject* pyobject) + { + // range-checking python integer to C++ bool conversion + long l = PyLong_AsLong(pyobject); + // fail to pass float -> bool; the problem is rounding (0.1 -> 0 -> False) + if (!(l == 0 || l == 1) || PyFloat_Check(pyobject)) { + PyErr_SetString(PyExc_ValueError, "boolean value should be bool, or integer 1 or 0"); + return (bool)-1; + } + return (bool)l; + } + + static long pylong_as_strictlong(PyObject* pyobject) + { + // convert to C++ long, don't allow truncation + if (!PyLong_Check(pyobject)) { + PyErr_SetString(PyExc_TypeError, "int/long conversion expects an integer object"); + return (long)-1; + } + return (long)PyLong_AsLong(pyobject); // already does long range check + } + + static unsigned long pylong_or_int_as_ulong(PyObject* pyobject) + { + // convert to C++ unsigned long, with bounds checking, allow int -> ulong. + if (PyFloat_Check(pyobject)) { + PyErr_SetString(PyExc_TypeError, "can\'t convert float to unsigned long"); + return (unsigned long)-1; + } + + unsigned long ul = PyLong_AsUnsignedLong(pyobject); + if (ul == (unsigned long)-1 && PyErr_Occurred() && PyLong_Check(pyobject)) { + PyErr_Clear(); + long i = PyLong_AS_LONG(pyobject); + if (0 <= i) { + ul = (unsigned long)i; + } else { + PyErr_SetString(PyExc_ValueError, "can\'t convert negative value to unsigned long"); + return (unsigned long)-1; + } + } + + return ul; + } + +#define BASIC_CONVERTER(name, cpptype, topy, frompy) \ + static intptr_t name##_to_py(cpptype a) \ + { \ + PyGILRAII gil; \ + return (intptr_t)topy(a); \ + } \ + \ + static cpptype py_to_##name(intptr_t pyobj) \ + { \ + PyGILRAII gil; \ + cpptype i = (cpptype)frompy((PyObject*)pyobj); \ + Py_DECREF((PyObject*)pyobj); \ + return i; \ + } + + BASIC_CONVERTER(bool, bool, PyBool_FromLong, pylong_as_bool) + BASIC_CONVERTER(int, int, PyLong_FromLong, PyLong_AsLong) + BASIC_CONVERTER(uint, unsigned int, PyLong_FromLong, pylong_or_int_as_ulong) + BASIC_CONVERTER(long, long, PyLong_FromLong, pylong_as_strictlong) + BASIC_CONVERTER(ulong, unsigned long, PyLong_FromUnsignedLong, pylong_or_int_as_ulong) + BASIC_CONVERTER(float, float, PyFloat_FromDouble, PyFloat_AsDouble) + BASIC_CONVERTER(double, double, PyFloat_FromDouble, PyFloat_AsDouble) + +#ifdef PHLEX_HAVE_NUMPY +#define VECTOR_CONVERTER(name, cpptype, nptype) \ + static intptr_t name##_to_py(std::shared_ptr> const& v) \ + { \ + PyGILRAII gil; \ + \ + /* use a numpy view with the shared pointer tied up in a lifeline object (note: this */ \ + /* is just a demonstrator; alternatives are still being considered) */ \ + npy_intp dims[] = {static_cast(v->size())}; \ + \ + PyObject* np_view = PyArray_SimpleNewFromData(1, /* 1-D array */ \ + dims, /* dimension sizes */ \ + nptype, /* numpy C type */ \ + v->data() /* raw buffer */ \ + ); \ + \ + if (!np_view) \ + return (intptr_t)nullptr; \ + \ + /* make the data read-only by not making it writable */ \ + PyArray_CLEARFLAGS((PyArrayObject*)np_view, NPY_ARRAY_WRITEABLE); \ + \ + /* create a lifeline object to tie this array and the original handle together; note */ \ + /* that the callback code needs to pick the data member out of the lifeline object, */ \ + /* when passing it to the registered Python function */ \ + py_lifeline_t* pyll = \ + (py_lifeline_t*)PhlexLifeline_Type.tp_new(&PhlexLifeline_Type, nullptr, nullptr); \ + pyll->m_view = np_view; /* steals reference */ \ + pyll->m_source = v; \ + \ + return (intptr_t)pyll; \ + } + + VECTOR_CONVERTER(vint, int, NPY_INT) + VECTOR_CONVERTER(vuint, unsigned int, NPY_UINT) + VECTOR_CONVERTER(vlong, long, NPY_LONG) + VECTOR_CONVERTER(vulong, unsigned long, NPY_ULONG) + VECTOR_CONVERTER(vfloat, float, NPY_FLOAT) + VECTOR_CONVERTER(vdouble, double, NPY_DOUBLE) + +#define NUMPY_ARRAY_CONVERTER(name, cpptype, nptype) \ + static std::shared_ptr> py_to_##name(intptr_t pyobj) \ + { \ + PyGILRAII gil; \ + \ + auto vec = std::make_shared>(); \ + \ + /* TODO: because of unresolved ownership issues, copy the full array contents */ \ + if (!pyobj || !PyArray_Check((PyObject*)pyobj)) { \ + PyErr_Clear(); /* how to report an error? */ \ + Py_DECREF((PyObject*)pyobj); \ + return vec; \ + } \ + \ + PyArrayObject* arr = (PyArrayObject*)pyobj; \ + \ + /* TODO: flattening the array here seems to be the only workable solution */ \ + npy_intp* dims = PyArray_DIMS(arr); \ + int nd = PyArray_NDIM(arr); \ + size_t total = 1; \ + for (int i = 0; i < nd; ++i) \ + total *= static_cast(dims[i]); \ + \ + /* copy the array info; note that this assumes C continuity */ \ + cpptype* raw = static_cast(PyArray_DATA(arr)); \ + vec->reserve(total); \ + vec->insert(vec->end(), raw, raw + total); \ + \ + Py_DECREF((PyObject*)pyobj); \ + return vec; \ + } + + NUMPY_ARRAY_CONVERTER(vint, int, NPY_INT) + NUMPY_ARRAY_CONVERTER(vuint, unsigned int, NPY_UINT) + NUMPY_ARRAY_CONVERTER(vlong, long, NPY_LONG) + NUMPY_ARRAY_CONVERTER(vulong, unsigned long, NPY_ULONG) + NUMPY_ARRAY_CONVERTER(vfloat, float, NPY_FLOAT) + NUMPY_ARRAY_CONVERTER(vdouble, double, NPY_DOUBLE) +#endif + +} // unnamed namespace + +#define INSERT_INPUT_CONVERTER(name, alg, inp) \ + mod->ph_module->transform("py" #name "_" + inp + "_" + alg, name##_to_py, concurrency::serial) \ + .input_family(product_query{product_specification::create(inp), LAYER}) \ + .output_products(alg + "_" + inp + "py") + +#define INSERT_OUTPUT_CONVERTER(name, alg, outp) \ + mod->ph_module->transform(#name "py_" + outp + "_" + alg, py_to_##name, concurrency::serial) \ + .input_family(product_query{product_specification::create("py" + outp + "_" + alg), LAYER}) \ + .output_products(outp) + +static PyObject* parse_args(PyObject* args, + PyObject* kwds, + std::string& functor_name, + std::vector& input_labels, + std::vector& input_types, + std::vector& output_labels, + std::vector& output_types) +{ + // Helper function to extract the common names and identifiers needed to insert + // any node. (The observer does not require outputs, but they still need to be + // retrieved, not ignored, to issue an error message if an output is provided.) + + static char const* kwnames[] = { + "callable", "input_family", "output_products", "concurrency", "name", nullptr}; + PyObject *callable = 0, *input = 0, *output = 0, *concurrency = 0, *pyname = 0; + if (!PyArg_ParseTupleAndKeywords( + args, kwds, "OO|OOO", (char**)kwnames, &callable, &input, &output, &concurrency, &pyname)) { + // error already set by argument parser + return nullptr; + } + + if (concurrency && concurrency != Py_None) { + PyErr_SetString(PyExc_TypeError, "only serial concurrency is supported"); + return nullptr; + } + + if (!callable || !PyCallable_Check(callable)) { + PyErr_SetString(PyExc_TypeError, "provided algorithm is not callable"); + return nullptr; + } + + // retrieve function name and argument types + if (!pyname) { + pyname = PyObject_GetAttrString(callable, "__name__"); + if (!pyname) { + // AttributeError already set + return nullptr; + } + } else + Py_INCREF(pyname); + functor_name = PyUnicode_AsUTF8(pyname); + Py_DECREF(pyname); + + if (!input) { + PyErr_SetString(PyExc_TypeError, "an input is required"); + return nullptr; + } + + if (!PySequence_Check(input) || (output && !PySequence_Check(output))) { + PyErr_SetString(PyExc_TypeError, "input and output need to be sequences"); + return nullptr; + } + + // convert input and output declarations, to be able to pass them to Phlex + input_labels = cseq(input); + output_labels = cseq(output); + if (output_labels.size() > 1) { + PyErr_SetString(PyExc_TypeError, "only a single output supported"); + return nullptr; + } + + // retrieve C++ (matching) types from annotations + input_types.reserve(input_labels.size()); + + PyObject* sann = PyUnicode_FromString("__annotations__"); + PyObject* annot = PyObject_GetAttr(callable, sann); + if (!annot) { + // the callable may be an instance with a __call__ method + PyErr_Clear(); + PyObject* callm = PyObject_GetAttrString(callable, "__call__"); + if (callm) { + annot = PyObject_GetAttr(callm, sann); + Py_DECREF(callm); + } + } + Py_DECREF(sann); + + if (annot && PyDict_Check(annot) && PyDict_Size(annot)) { + PyObject* ret = PyDict_GetItemString(annot, "return"); + if (ret) + output_types.push_back(annotation_as_text(ret)); + + // dictionary is ordered with return last if provide (note: the keys here + // could be used as input labels, instead of the ones from the configuration, + // but that is probably not practical in actual use, so they are ignored) + PyObject* values = PyDict_Values(annot); + for (Py_ssize_t i = 0; i < (PyList_GET_SIZE(values) - (ret ? 1 : 0)); ++i) { + PyObject* item = PyList_GET_ITEM(values, i); + input_types.push_back(annotation_as_text(item)); + } + Py_DECREF(values); + } + Py_XDECREF(annot); + + // ignore None as Python's conventional "void" return, which is meaningless in C++ + if (output_types.size() == 1 && output_types[0] == "None") + output_types.clear(); + + // if annotations were correct (and correctly parsed), there should be as many + // input types as input labels + if (input_types.size() != input_labels.size()) { + PyErr_Format(PyExc_TypeError, + "number of inputs (%d; %s) does not match number of annotation types (%d; %s)", + input_labels.size(), + stringify(input_labels).c_str(), + input_types.size(), + stringify(input_types).c_str()); + return nullptr; + } + + // no common errors detected; actual registration may have more checks + Py_INCREF(callable); + return callable; +} + +static bool insert_input_converters(py_phlex_module* mod, + std::string const& cname, // TODO: shared_ptr + std::vector const& input_labels, + std::vector const& input_types) +{ + // insert input converter nodes into the graph + for (size_t i = 0; i < (size_t)input_labels.size(); ++i) { + // TODO: this seems overly verbose and inefficient, but the function needs + // to be properly types, so every option is made explicit + auto const& inp = input_labels[i]; + auto const& inp_type = input_types[i]; + + if (inp_type == "bool") + INSERT_INPUT_CONVERTER(bool, cname, inp); + else if (inp_type == "int") + INSERT_INPUT_CONVERTER(int, cname, inp); + else if (inp_type == "unsigned int") + INSERT_INPUT_CONVERTER(uint, cname, inp); + else if (inp_type == "long") + INSERT_INPUT_CONVERTER(long, cname, inp); + else if (inp_type == "unsigned long") + INSERT_INPUT_CONVERTER(ulong, cname, inp); + else if (inp_type == "float") + INSERT_INPUT_CONVERTER(float, cname, inp); + else if (inp_type == "double") + INSERT_INPUT_CONVERTER(double, cname, inp); +#ifdef PHLEX_HAVE_NUMPY + else if (inp_type.compare(0, 13, "numpy.ndarray") == 0) { + // TODO: these are hard-coded std::vector <-> numpy array mappings, which is + // way too simplistic for real use. It only exists for demonstration purposes, + // until we have an IDL + auto pos = inp_type.rfind("numpy.dtype"); + if (pos == std::string::npos) { + PyErr_Format( + PyExc_TypeError, "could not determine dtype of input type \"%s\"", inp_type.c_str()); + return false; + } + + pos += 18; + + std::string py_out = cname + "_" + inp + "py"; + if (inp_type.compare(pos, std::string::npos, "int32]]") == 0) { + mod->ph_module->transform("pyvint_" + inp + "_" + cname, vint_to_py, concurrency::serial) + .input_family(product_query{product_specification::create(inp), LAYER}) + .output_products(py_out); + } else if (inp_type.compare(pos, std::string::npos, "uint32]]") == 0) { + mod->ph_module->transform("pyvuint_" + inp + "_" + cname, vuint_to_py, concurrency::serial) + .input_family(product_query{product_specification::create(inp), LAYER}) + .output_products(py_out); + } else if (inp_type.compare(pos, std::string::npos, "int64]]") == 0) { // need not be true + mod->ph_module->transform("pyvlong_" + inp + "_" + cname, vlong_to_py, concurrency::serial) + .input_family(product_query{product_specification::create(inp), LAYER}) + .output_products(py_out); + } else if (inp_type.compare(pos, std::string::npos, "uint64]]") == 0) { // id. + mod->ph_module + ->transform("pyvulong_" + inp + "_" + cname, vulong_to_py, concurrency::serial) + .input_family(product_query{product_specification::create(inp), LAYER}) + .output_products(py_out); + } else if (inp_type.compare(pos, std::string::npos, "float32]]") == 0) { + mod->ph_module + ->transform("pyvfloat_" + inp + "_" + cname, vfloat_to_py, concurrency::serial) + .input_family(product_query{product_specification::create(inp), LAYER}) + .output_products(py_out); + } else if (inp_type.compare(pos, std::string::npos, "double64]]") == 0) { + mod->ph_module + ->transform("pyvdouble_" + inp + "_" + cname, vdouble_to_py, concurrency::serial) + .input_family(product_query{product_specification::create(inp), LAYER}) + .output_products(py_out); + } else { + PyErr_Format(PyExc_TypeError, "unsupported array input type \"%s\"", inp_type.c_str()); + return false; + } + } +#endif + else { + PyErr_Format(PyExc_TypeError, "unsupported input type \"%s\"", inp_type.c_str()); + return false; + } + } + + return true; +} + +static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kwds) +{ + // Register a python algorithm by adding the necessary intermediate converter + // nodes going from C++ to PyObject* and back. + + std::string cname; + std::vector input_labels, input_types, output_labels, output_types; + PyObject* callable = + parse_args(args, kwds, cname, input_labels, input_types, output_labels, output_types); + if (!callable) + return nullptr; // error already set + + if (output_types.empty()) { + PyErr_Format(PyExc_TypeError, "a transform should have an output type"); + return nullptr; + } + + // TODO: only support single output type for now, as there has to be a mapping + // onto a std::tuple otherwise, which is a typed object, thus complicating the + // template instantiation + std::string output = output_labels[0]; + std::string output_type = output_types[0]; + + if (!insert_input_converters(mod, cname, input_labels, input_types)) + return nullptr; // error already set + + // register Python transform + std::string py_out = "py" + output + "_" + cname; + if (input_labels.size() == 1) { + auto* pyc = new py_callback_1{callable}; // TODO: leaks, but has program lifetime + mod->ph_module->transform(cname, *pyc, concurrency::serial) + .input_family( + product_query{product_specification::create(cname + "_" + input_labels[0] + "py"), LAYER}) + .output_products(py_out); + } else if (input_labels.size() == 2) { + auto* pyc = new py_callback_2{callable}; + mod->ph_module->transform(cname, *pyc, concurrency::serial) + .input_family( + product_query{product_specification::create(cname + "_" + input_labels[0] + "py"), LAYER}, + product_query{product_specification::create(cname + "_" + input_labels[1] + "py"), LAYER}) + .output_products(py_out); + } else if (input_labels.size() == 3) { + auto* pyc = new py_callback_3{callable}; + mod->ph_module->transform(cname, *pyc, concurrency::serial) + .input_family( + product_query{product_specification::create(cname + "_" + input_labels[0] + "py"), LAYER}, + product_query{product_specification::create(cname + "_" + input_labels[1] + "py"), LAYER}, + product_query{product_specification::create(cname + "_" + input_labels[2] + "py"), LAYER}) + .output_products(py_out); + } else { + PyErr_SetString(PyExc_TypeError, "unsupported number of inputs"); + return nullptr; + } + + // insert output converter node into the graph (TODO: same as above; these + // are explicit b/c of the templates only) + if (output_type == "bool") + INSERT_OUTPUT_CONVERTER(bool, cname, output); + else if (output_type == "int") + INSERT_OUTPUT_CONVERTER(int, cname, output); + else if (output_type == "unsigned int") + INSERT_OUTPUT_CONVERTER(uint, cname, output); + else if (output_type == "long") + INSERT_OUTPUT_CONVERTER(long, cname, output); + else if (output_type == "unsigned long") + INSERT_OUTPUT_CONVERTER(ulong, cname, output); + else if (output_type == "float") + INSERT_OUTPUT_CONVERTER(float, cname, output); + else if (output_type == "double") + INSERT_OUTPUT_CONVERTER(double, cname, output); +#ifdef PHLEX_HAVE_NUMPY + else if (output_type.compare(0, 13, "numpy.ndarray") == 0) { + // TODO: just like for input types, these are hard-coded, but should be handled by + // an IDL instead. + auto pos = output_type.rfind("numpy.dtype"); + if (pos == std::string::npos) { + PyErr_Format( + PyExc_TypeError, "could not determine dtype of input type \"%s\"", output_type.c_str()); + return nullptr; + } + + pos += 18; + + auto py_in = "py" + output + "_" + cname; + if (output_type.compare(pos, std::string::npos, "int32]]") == 0) { + mod->ph_module->transform("pyvint_" + output + "_" + cname, py_to_vint, concurrency::serial) + .input_family(product_query{product_specification::create(py_in), LAYER}) + .output_products(output); + } else if (output_type.compare(pos, std::string::npos, "uint32]]") == 0) { + mod->ph_module->transform("pyvuint_" + output + "_" + cname, py_to_vuint, concurrency::serial) + .input_family(product_query{product_specification::create(py_in), LAYER}) + .output_products(output); + } else if (output_type.compare(pos, std::string::npos, "int64]]") == 0) { // need not be true + mod->ph_module->transform("pyvlong_" + output + "_" + cname, py_to_vlong, concurrency::serial) + .input_family(product_query{product_specification::create(py_in), LAYER}) + .output_products(output); + } else if (output_type.compare(pos, std::string::npos, "uint64]]") == 0) { // id. + mod->ph_module + ->transform("pyvulong_" + output + "_" + cname, py_to_vulong, concurrency::serial) + .input_family(product_query{product_specification::create(py_in), LAYER}) + .output_products(output); + } else if (output_type.compare(pos, std::string::npos, "float32]]") == 0) { + mod->ph_module + ->transform("pyvfloat_" + output + "_" + cname, py_to_vfloat, concurrency::serial) + .input_family(product_query{product_specification::create(py_in), LAYER}) + .output_products(output); + } else if (output_type.compare(pos, std::string::npos, "double64]]") == 0) { + mod->ph_module + ->transform("pyvdouble_" + output + "_" + cname, py_to_vdouble, concurrency::serial) + .input_family(product_query{product_specification::create(py_in), LAYER}) + .output_products(output); + } else { + PyErr_Format(PyExc_TypeError, "unsupported array output type \"%s\"", output_type.c_str()); + return nullptr; + } + } +#endif + else { + PyErr_Format(PyExc_TypeError, "unsupported output type \"%s\"", output_type.c_str()); + return nullptr; + } + + Py_RETURN_NONE; +} + +static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds) +{ + // Register a python observer by adding the necessary intermediate converter + // nodes going from C++ to PyObject* and back. + + std::string cname; + std::vector input_labels, input_types, output_labels, output_types; + PyObject* callable = + parse_args(args, kwds, cname, input_labels, input_types, output_labels, output_types); + if (!callable) + return nullptr; // error already set + + if (!output_types.empty()) { + PyErr_Format(PyExc_TypeError, "an observer should not have an output type"); + return nullptr; + } + + if (!insert_input_converters(mod, cname, input_labels, input_types)) + return nullptr; // error already set + + // register Python observer + if (input_labels.size() == 1) { + auto* pyc = new py_callback_1v{callable}; // id. + mod->ph_module->observe(cname, *pyc, concurrency::serial) + .input_family( + product_query{product_specification::create(cname + "_" + input_labels[0] + "py"), LAYER}); + } else if (input_labels.size() == 2) { + auto* pyc = new py_callback_2v{callable}; + mod->ph_module->observe(cname, *pyc, concurrency::serial) + .input_family( + product_query{product_specification::create(cname + "_" + input_labels[0] + "py"), LAYER}, + product_query{product_specification::create(cname + "_" + input_labels[1] + "py"), LAYER}); + } else if (input_labels.size() == 3) { + auto* pyc = new py_callback_3v{callable}; + mod->ph_module->observe(cname, *pyc, concurrency::serial) + .input_family( + product_query{product_specification::create(cname + "_" + input_labels[0] + "py"), LAYER}, + product_query{product_specification::create(cname + "_" + input_labels[1] + "py"), LAYER}, + product_query{product_specification::create(cname + "_" + input_labels[2] + "py"), LAYER}); + } else { + PyErr_SetString(PyExc_TypeError, "unsupported number of inputs"); + return nullptr; + } + + Py_RETURN_NONE; +} + +static PyMethodDef md_methods[] = {{(char*)"transform", + (PyCFunction)md_transform, + METH_VARARGS | METH_KEYWORDS, + (char*)"register a Python transform"}, + {(char*)"observe", + (PyCFunction)md_observe, + METH_VARARGS | METH_KEYWORDS, + (char*)"register a Python observer"}, + {(char*)nullptr, nullptr, 0, nullptr}}; + +// clang-format off +PyTypeObject phlex::experimental::PhlexModule_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + (char*)"pyphlex.module", // tp_name + sizeof(py_phlex_module), // tp_basicsize + 0, // tp_itemsize + 0, // tp_dealloc + 0, // tp_vectorcall_offset / tp_print + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_as_async / tp_compare + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + 0, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + 0, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + (char*)"phlex module wrapper", // tp_doc + 0, // tp_traverse + 0, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + 0, // tp_iter + 0, // tp_iternext + md_methods, // tp_methods + 0, // tp_members + 0, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + 0, // tp_dictoffset + 0, // tp_init + 0, // tp_alloc + 0, // tp_new + 0, // tp_free + 0, // tp_is_gc + 0, // tp_bases + 0, // tp_mro + 0, // tp_cache + 0, // tp_subclasses + 0 // tp_weaklist +#if PY_VERSION_HEX >= 0x02030000 + , 0 // tp_del +#endif +#if PY_VERSION_HEX >= 0x02060000 + , 0 // tp_version_tag +#endif +#if PY_VERSION_HEX >= 0x03040000 + , 0 // tp_finalize +#endif +#if PY_VERSION_HEX >= 0x03080000 + , 0 // tp_vectorcall +#endif +#if PY_VERSION_HEX >= 0x030c0000 + , 0 // tp_watched +#endif +#if PY_VERSION_HEX >= 0x030d0000 + , 0 // tp_versions_used +#endif +}; +// clang-format on diff --git a/plugins/python/src/pymodule.cpp b/plugins/python/src/pymodule.cpp new file mode 100644 index 000000000..c27f9e6ad --- /dev/null +++ b/plugins/python/src/pymodule.cpp @@ -0,0 +1,139 @@ +#include +#include +#include +#include + +#include "phlex/core/framework_graph.hpp" + +#include "wrap.hpp" + +#ifdef PHLEX_HAVE_NUMPY +#define PY_ARRAY_UNIQUE_SYMBOL phlex_ARRAY_API +#include +#endif + +using namespace phlex::experimental; + +static bool initialize(); + +PHLEX_EXPERIMENTAL_REGISTER_ALGORITHMS(m, config) +{ + initialize(); + + PyGILRAII g; + + std::string modname = config.get("pyplugin"); + PyObject* mod = PyImport_ImportModule(modname.c_str()); + if (mod) { + PyObject* reg = PyObject_GetAttrString(mod, "PHLEX_EXPERIMENTAL_REGISTER_ALGORITHMS"); + if (reg) { + PyObject* pym = wrap_module(&m); + PyObject* pyconfig = wrap_configuration(&config); + if (pym && pyconfig) { + PyObject* res = PyObject_CallFunctionObjArgs(reg, pym, pyconfig, nullptr); + Py_XDECREF(res); + } + Py_XDECREF(pyconfig); + Py_XDECREF(pym); + Py_DECREF(reg); + } + Py_DECREF(mod); + } + + if (PyErr_Occurred()) { + std::string error_msg; + if (!msg_from_py_error(error_msg)) + error_msg = "Unknown python error"; + throw std::runtime_error(error_msg.c_str()); + } +} + +#ifdef PHLEX_HAVE_NUMPY +static void import_numpy(bool control_interpreter) +{ + static std::atomic numpy_imported{false}; + if (!numpy_imported.exchange(true)) { + if (_import_array() < 0) { + PyErr_Print(); + if (control_interpreter) + Py_Finalize(); + throw std::runtime_error("build with numpy support, but numpy not importable"); + } + } +} +#endif + +static bool initialize() +{ + if (Py_IsInitialized()) { +#ifdef PHLEX_HAVE_NUMPY + import_numpy(false); +#endif + return true; + } + + // TODO: the Python library is already loaded (b/c it's linked with + // this module), but its symbols need to be exposed globally to Python + // extension modules such as ctypes, yet this module is loaded with + // private visibility only. The workaround here locates the library and + // reloads (the handle is leaked b/c there's no knowing when it needs + // to be offloaded). + void* addr = dlsym(RTLD_DEFAULT, "Py_IsInitialized"); + if (addr) { + Dl_info info; + if (dladdr(addr, &info) == 0 || info.dli_fname == 0 || info.dli_fname[0] == 0) { + throw std::runtime_error("unable to determine linked libpython"); + } + dlopen(info.dli_fname, RTLD_GLOBAL | RTLD_NOW); + } + +#if PY_VERSION_HEX < 0x03020000 + PyEval_InitThreads(); +#endif +#if PY_VERSION_HEX < 0x03080000 + Py_Initialize(); +#else + PyConfig config; + PyConfig_InitPythonConfig(&config); + PyConfig_SetString(&config, &config.program_name, L"phlex"); + Py_InitializeFromConfig(&config); +#endif +#if PY_VERSION_HEX >= 0x03020000 +#if PY_VERSION_HEX < 0x03090000 + PyEval_InitThreads(); +#endif +#endif + // try again to see if the interpreter is now initialized + if (!Py_IsInitialized()) + throw std::runtime_error("Python can not be initialized"); + +#if PY_VERSION_HEX < 0x03080000 + // set the command line arguments on python's sys.argv + wchar_t* argv[] = {const_cast(L"phlex")}; + PySys_SetArgv(sizeof(argv) / sizeof(argv[0]), argv); +#endif + + // add custom types + if (PyType_Ready(&PhlexConfig_Type) < 0) + return false; + if (PyType_Ready(&PhlexModule_Type) < 0) + return false; + if (PyType_Ready(&PhlexLifeline_Type) < 0) + return false; + + // load numpy (see also above, if already initialized) +#ifdef PHLEX_HAVE_NUMPY + import_numpy(true); +#endif + + // TODO: the GIL should first be released on the main thread and this seems + // to be the only place to do it. However, there is no equivalent place to + // re-acquire it after the TBB runs are done, so normal shutdown of the + // Python interpreter will not happen atm. + static std::atomic gil_released{false}; + if (!gil_released.exchange(true)) { + (void)PyEval_SaveThread(); // state not saved, as no place to restore + } + + return true; +} diff --git a/plugins/python/src/wrap.hpp b/plugins/python/src/wrap.hpp new file mode 100644 index 000000000..96bad83e6 --- /dev/null +++ b/plugins/python/src/wrap.hpp @@ -0,0 +1,71 @@ +#ifndef phlex_python_wrap_hpp +#define phlex_python_wrap_hpp + +// ======================================================================================= +// +// Registration type wrappers. +// +// Design rationale +// ================ +// +// The C++ and Python registration mechanisms are tailored to each language (e.g. the +// discovery of algorithm signatures is rather different). Furthermore, the Python side +// has its own registration pythonized module. Thus, it is not necessary to expose the +// full C++ registration types on the Python side and for the sake of efficiency, these +// wrappers provide a minimalistic interface. +// +// ======================================================================================= + +#include "Python.h" + +#include +#include + +#include "phlex/configuration.hpp" +#include "phlex/module.hpp" + +namespace phlex::experimental { + + // Create dict-like access to the configuration from Python. + // Returns a new reference. + PyObject* wrap_configuration(configuration const* config); + + // Python wrapper for Phlex configuration + extern PyTypeObject PhlexConfig_Type; + struct py_config_map; + + // Phlex' Module wrapper to register algorithms + typedef graph_proxy phlex_module_t; + // Returns a new reference. + PyObject* wrap_module(phlex_module_t* mod); + + // Python wrapper for Phlex modules + extern PyTypeObject PhlexModule_Type; + struct py_phlex_module; + + // Python wrapper for Phlex handles + extern PyTypeObject PhlexLifeline_Type; + // clang-format off + struct py_lifeline { + PyObject_HEAD + PyObject* m_view; + std::shared_ptr m_source; + }; + using py_lifeline_t = py_lifeline; + // clang-format on + + // Error reporting helper. + bool msg_from_py_error(std::string& msg, bool check_error = false); + + // RAII helper for GIL handling + class PyGILRAII { + PyGILState_STATE m_GILState; + + public: + PyGILRAII() : m_GILState(PyGILState_Ensure()) {} + ~PyGILRAII() { PyGILState_Release(m_GILState); } + }; + +} // namespace phlex::experimental + +#endif // phlex_python_pymodule_hpp diff --git a/test/python/CMakeLists.txt b/test/python/CMakeLists.txt index 9a79516e0..df0c62bed 100644 --- a/test/python/CMakeLists.txt +++ b/test/python/CMakeLists.txt @@ -57,6 +57,7 @@ except ImportError: check_python_module_version("cppyy" "3.6.0" HAS_CPPYY) check_python_module_version("numba" "0.61.0" HAS_NUMBA) + check_python_module_version("numpy" "2.0.0" HAS_NUMPY) if(HAS_CPPYY) @@ -71,50 +72,77 @@ except ImportError: set(PYTHON_TEST_PHLEX_INSTALL ${CMAKE_SOURCE_DIR}) endif() - # C++ helper to provide a driver - add_library(py_cppdriver MODULE driver.cpp) - target_link_libraries(py_cppdriver PRIVATE Boost::json phlex::core) - - # Phlex module to run Python algorithms - add_library(pymodule MODULE pymodule.cpp) - include_directories(pymodule, ${Python_INCLUDE_DIRS}) - target_link_libraries( - pymodule - PRIVATE phlex::module ${Python_LIBRARIES} - PUBLIC Python::Python - ) - - # tests of the python support modules + # tests of the python support modules (relies on cppyy) add_test( NAME py:phlex COMMAND ${PYTHON_TEST_EXECUTABLE} -m pytest test_phlex.py WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) - # phlex-based tests - add_test(NAME py:add COMMAND phlex -c - ${CMAKE_CURRENT_SOURCE_DIR}/pyadd.jsonnet - ) - - set(TEST_PYTHONPATH ${CMAKE_CURRENT_SOURCE_DIR}) - if(DEFINED ENV{VIRTUAL_ENV}) - set(TEST_PYTHONPATH - ${TEST_PYTHONPATH}:${Python_SITELIB}:${Python_SITEARCH} - ) - endif() - set(TEST_PYTHONPATH ${TEST_PYTHONPATH}:$ENV{PYTHONPATH}) - set_property( TEST py:phlex PROPERTY ENVIRONMENT "PHLEX_INSTALL=${PYTHON_TEST_PHLEX_INSTALL}" ) - set_property( - TEST py:add - PROPERTY - ENVIRONMENT - "PHLEX_PLUGIN_PATH=${CMAKE_CURRENT_BINARY_DIR};PYTHONPATH=${TEST_PYTHONPATH};PHLEX_INSTALL=${PYTHON_TEST_PHLEX_INSTALL}" - ) + endif() + + set(ACTIVE_PY_CPHLEX_TESTS "") + + # C++ helper to provide a driver + add_library(cppdriver4py MODULE driver.cpp) + target_link_libraries(cppdriver4py PRIVATE phlex::module) + + # numpy support if installed + if(HAS_NUMPY) + + # phlex-based tests that require numpy support + add_test(NAME py:vec COMMAND phlex -c + ${CMAKE_CURRENT_SOURCE_DIR}/pyvec.jsonnet + ) + list(APPEND ACTIVE_PY_CPHLEX_TESTS py:vec) + endif() + + # phlex-based tests (no cppyy dependency) + add_test(NAME py:add COMMAND phlex -c + ${CMAKE_CURRENT_SOURCE_DIR}/pyadd.jsonnet + ) + list(APPEND ACTIVE_PY_CPHLEX_TESTS py:add) + + add_test(NAME py:config COMMAND phlex -c + ${CMAKE_CURRENT_SOURCE_DIR}/pyconfig.jsonnet + ) + list(APPEND ACTIVE_PY_CPHLEX_TESTS py:config) + + add_test(NAME py:reduce COMMAND phlex -c + ${CMAKE_CURRENT_SOURCE_DIR}/pyreduce.jsonnet + ) + list(APPEND ACTIVE_PY_CPHLEX_TESTS py:reduce) + + # "failing" tests for checking error paths + add_test( + NAME py:failure + COMMAND + ${CMAKE_CURRENT_SOURCE_DIR}/failing_test_wrap.sh + ${PROJECT_BINARY_DIR}/bin/phlex -c + ${CMAKE_CURRENT_SOURCE_DIR}/pyfailure.jsonnet + ) + set_tests_properties( + py:failure PROPERTIES PASS_REGULAR_EXPRESSION + "property \"input\" does not exist" + ) + list(APPEND ACTIVE_PY_CPHLEX_TESTS py:failure) + + set(TEST_PYTHONPATH ${CMAKE_CURRENT_SOURCE_DIR}) + if(DEFINED ENV{VIRTUAL_ENV}) + set(TEST_PYTHONPATH ${TEST_PYTHONPATH}:${Python_SITELIB}:${Python_SITEARCH}) + endif() + set(TEST_PYTHONPATH ${TEST_PYTHONPATH}:$ENV{PYTHONPATH}) + + set_tests_properties( + ${ACTIVE_PY_CPHLEX_TESTS} + PROPERTIES + ENVIRONMENT + "SPDLOG_LEVEL=debug;PHLEX_PLUGIN_PATH=${PROJECT_BINARY_DIR};PYTHONPATH=${TEST_PYTHONPATH};PHLEX_INSTALL=${PYTHON_TEST_PHLEX_INSTALL}" + ) - endif() # cppyy available endif() # Python available diff --git a/test/python/adder.py b/test/python/adder.py index e5bc7717e..1aab19565 100644 --- a/test/python/adder.py +++ b/test/python/adder.py @@ -1,9 +1,45 @@ -"""This module defines a simple C++ function using cppyy.""" -import cppyy +"""A most basic algorithm. -cppyy.cppdef("""\ -namespace test { - int add(int i, int j) { return i + j; } -}""") +This test code implements the smallest possible run that does something +real. It serves as a "Hello, World" equivalent for running Python code. +""" + +def add(i: int, j: int) -> int: + """Add the inputs together and return the sum total. + + Use the standard `+` operator to add the two inputs together + to arrive at their total. + + Args: + i (int): First input. + j (int): Second input. + + Returns: + int: Sum of the two inputs. + + Examples: + >>> add(1, 2) + 3 + """ + return i + j + + +def PHLEX_EXPERIMENTAL_REGISTER_ALGORITHMS(m, config): + """Register the `add` algorithm as a transformation. + + Use the standard Phlex `transform` registration to insert a node + in the execution graph that receives two inputs and produces their + sum as an ouput. The labels of inputs and outputs are taken from + the configuration. + + Args: + m (internal): Phlex registrar representation. + config (internal): Phlex configuration representation. + + Returns: + None + """ + m.transform(add, + input_family = config["input"], + output_products = config["output"]) -add = cppyy.gbl.test.add diff --git a/test/python/all_config.py b/test/python/all_config.py new file mode 100644 index 000000000..59cbfa613 --- /dev/null +++ b/test/python/all_config.py @@ -0,0 +1,75 @@ +"""Python-representation of configuration. + +This test code comes with a configurtion file that provides exemplars of +all supported types. It then verifies whether this is properly translated +to Python. The actual run is a noop. +""" + +class ConfigConsumer: + """A callable class that "needs" every configuration type. + + Attributes: + __name__ (str): Identifier for Phlex. + """ + + __name__ = 'config_consumer' + + def __init__(self, config): + """Create a config consumer object. + + Args: + config (internal): Phlex configuration + + Raises: + AssertionError: if the provided configuration values do not + match the expected ones. + + Returns: + None + """ + # builtin types + assert config['a_bool'] == False # noqa: E712 # we really want to check False + assert config['an_int'] == -37 + assert config['a_uint'] == 18446744073709551616 + assert config['a_float'] == 3.1415 + assert config['a_string'] == 'foo' + + # collection types + assert config['some_bools'] == (False, True) + assert config['some_ints'] == (-1, 42, -55) + assert config['some_uints'] == (18446744073709551616, 29, 137) + assert config['some_floats'] == (3.1415, 2.71828) + assert config['some_strings'] == ('aap', 'noot', 'mies') + + def __call__(self, i: int, j: int) -> None: + """Dummy routine to do something. + + Consume values `i` and `j` to execute in the Phlex graph. + + Args: + i (int): The first input value. + j (int): The second input value. + + Returns: + None + """ + pass + + +def PHLEX_EXPERIMENTAL_REGISTER_ALGORITHMS(m, config): + """Register an instance of `ConfigConsumer` as an observer. + + Use the standard Phlex `observe` registration to insert a node in + the execution graph that receives two values `i` and `j` for no + particular reason other than to run inside the execution graph. + + Args: + m (internal): Phlex registrar representation. + config (internal): Phlex configuration representation. + + Returns: + None + """ + config_consumer = ConfigConsumer(config) + m.observe(config_consumer, input_family = config["input"]) + diff --git a/test/python/driver.cpp b/test/python/driver.cpp index 8111b3d8e..ac3a20ba8 100644 --- a/test/python/driver.cpp +++ b/test/python/driver.cpp @@ -1,32 +1,11 @@ -#include "phlex/model/product_store.hpp" -#include "phlex/source.hpp" +#include "phlex/module.hpp" -#include +using namespace phlex::experimental; -namespace { - class number_generator { - public: - number_generator(phlex::experimental::configuration const& config) : - n_{config.get("max_numbers")} - { - } - - void next(phlex::experimental::framework_driver& driver) const - { - auto job_store = phlex::experimental::product_store::base(); - driver.yield(job_store); - - for (int i : std::views::iota(1, n_ + 1)) { - auto store = job_store->make_child(i, "event"); - store->add_product("i", i); - store->add_product("j", -i); - driver.yield(store); - } - } - - private: - int n_; - }; +PHLEX_EXPERIMENTAL_REGISTER_ALGORITHMS(m) +{ + m.provide("provide_i", [](data_cell_index const& id) -> int { return id.number(); }) + .output_product("i"_in("job")); + m.provide("provide_j", [](data_cell_index const& id) -> int { return -id.number() + 1; }) + .output_product("j"_in("job")); } - -PHLEX_EXPERIMENTAL_REGISTER_SOURCE(number_generator) diff --git a/test/python/failing_test_wrap.sh b/test/python/failing_test_wrap.sh new file mode 100755 index 000000000..ee8081316 --- /dev/null +++ b/test/python/failing_test_wrap.sh @@ -0,0 +1,7 @@ +#!/bin/bash +"$@" +exit_code=$? +if [ $exit_code -ne 0 ]; then + exit 1 +fi +exit 0 diff --git a/test/python/pyphlex.py b/test/python/phlexpy.py similarity index 100% rename from test/python/pyphlex.py rename to test/python/phlexpy.py diff --git a/test/python/pyadd.jsonnet b/test/python/pyadd.jsonnet index 4ddc46acd..fe9c3293b 100644 --- a/test/python/pyadd.jsonnet +++ b/test/python/pyadd.jsonnet @@ -1,15 +1,25 @@ { source: { - plugin: 'py_cppdriver', - max_numbers: 10, + plugin: 'generate_layers', + layers: { + event: { parent: 'job', total: 10, starting_number: 1 } + } }, modules: { + cppdriver: { + plugin: 'cppdriver4py', + }, pyadd: { - pymodule: 'adder', - pyalg: 'add', + plugin: 'pymodule', + pyplugin: 'adder', input: ['i', 'j'], output: ['sum'], + }, + pyverify: { plugin: 'pymodule', + pyplugin: 'verify', + input: ['sum'], + sum_total: 1, }, }, } diff --git a/test/python/pyconfig.jsonnet b/test/python/pyconfig.jsonnet new file mode 100644 index 000000000..ee54c947a --- /dev/null +++ b/test/python/pyconfig.jsonnet @@ -0,0 +1,28 @@ +{ + source: { + plugin: 'generate_layers', + layers: { + event: { parent: "job", total: 10, starting_number: 1 } + } + }, + modules: { + cppdriver: { + plugin: 'cppdriver4py', + }, + pyconfig: { + plugin: 'pymodule', + pyplugin: 'all_config', + input: ['i', 'j'], + a_bool: false, + an_int: -37, + a_uint: 18446744073709551616, + a_float: 3.1415, + a_string: 'foo', + some_bools: [false, true], + some_ints: [-1, 42, -55], + some_uints: [18446744073709551616, 29, 137], + some_floats: [3.1415, 2.71828], + some_strings: ['aap', 'noot', 'mies'], + }, + }, +} diff --git a/test/python/pyfailure.jsonnet b/test/python/pyfailure.jsonnet new file mode 100644 index 000000000..f460d4fc5 --- /dev/null +++ b/test/python/pyfailure.jsonnet @@ -0,0 +1,19 @@ +{ + source: { + plugin: 'generate_layers', + layers: { + event: { parent: 'job', total: 10, starting_number: 1 } + } + }, + modules: { + cppdriver: { + plugin: 'cppdriver4py', + }, + pyadd: { + plugin: 'pymodule', + pyplugin: 'adder', + #input: ['i', 'j'], # commented out to cause a failure + output: ['sum'], + }, + }, +} diff --git a/test/python/pymodule.cpp b/test/python/pymodule.cpp deleted file mode 100644 index a223dda59..000000000 --- a/test/python/pymodule.cpp +++ /dev/null @@ -1,110 +0,0 @@ -#include "phlex/module.hpp" - -#include -#include - -using namespace phlex::experimental; - -static bool Initialize() -{ - if (Py_IsInitialized()) - return true; - - // TODO: the Python library is already loaded (b/c it's linked with - // this module), but its symbols need to be exposed globally to Python - // extension modules such as ctypes, yet this module is loaded with - // private visibility only. The workaround here locates the library and - // reloads (the handle is leaked b/c there's no knowing when it needs - // to be offloaded). - void* addr = dlsym(RTLD_DEFAULT, "Py_IsInitialized"); - if (addr) { - Dl_info info; - if (dladdr(addr, &info) == 0 || info.dli_fname == 0 || info.dli_fname[0] == 0) { - throw std::runtime_error("unable to determine linked libpython"); - } - dlopen(info.dli_fname, RTLD_GLOBAL | RTLD_NOW); - } else { - throw std::runtime_error("can not locate linked libpython"); - } - -#if PY_VERSION_HEX < 0x03020000 - PyEval_InitThreads(); -#endif -#if PY_VERSION_HEX < 0x03080000 - Py_Initialize(); -#else - PyConfig config; - PyConfig_InitPythonConfig(&config); - PyConfig_SetString(&config, &config.program_name, L"phlex"); - Py_InitializeFromConfig(&config); -#endif -#if PY_VERSION_HEX >= 0x03020000 -#if PY_VERSION_HEX < 0x03090000 - PyEval_InitThreads(); -#endif -#endif - - // try again to see if the interpreter is now initialized - if (!Py_IsInitialized()) - throw std::runtime_error("Python can not be initialized"); - -#if PY_VERSION_HEX < 0x03080000 - // set the command line arguments on python's sys.argv - wchar_t* argv[] = {const_cast(L"phlex")}; - PySys_SetArgv(sizeof(argv) / sizeof(argv[0]), argv); -#endif - - return true; -} - -PHLEX_EXPERIMENTAL_REGISTER_ALGORITHMS(m, config) -{ - Initialize(); - - PyObject* registry = PyImport_ImportModule("registry"); - if (registry) { - PyObject* reg = PyObject_GetAttrString(registry, "register"); - if (reg) { - PyObject* pym = PyCapsule_New(&m, nullptr, nullptr); - PyObject* pyconfig = PyCapsule_New((void*)&config, nullptr, nullptr); - PyObject* res = PyObject_CallFunctionObjArgs(reg, pym, pyconfig, nullptr); - Py_XDECREF(res); - Py_DECREF(pyconfig); - Py_DECREF(pym); - Py_DECREF(reg); - } - Py_DECREF(registry); - } - - if (PyErr_Occurred()) { - std::string msg; -#if PY_VERSION_HEX < 0x30c000000 - PyObject *type = nullptr, *value = nullptr, *traceback = nullptr; - PyErr_Fetch(&type, &value, &traceback); - if (value) { - PyObject* pymsg = PyObject_Str(value); - msg = PyUnicode_AsUTF8(pymsg); - Py_DECREF(pymsg); - } else { - msg = "unknown Python error occurred"; - } - Py_XDECREF(traceback); - Py_XDECREF(value); - Py_XDECREF(type); -#else - PyObject* exc = PyErr_GetRaisedException(); - if (exc) { - PyObject* pymsg = PyObject_Str(exc); - msg = PyUnicode_AsString(pymsg); - Py_DECREF(pymsg); - Py_DECREF(exc); - } -#endif - throw std::runtime_error(msg.c_str()); - } -} - -// dict-like construct to access configuration from Python - -// TODO: the current implementation of the configuration hides the iteration -// over the underlying object, thus preventing nice printout etc. diff --git a/test/python/pyreduce.jsonnet b/test/python/pyreduce.jsonnet new file mode 100644 index 000000000..df79bff75 --- /dev/null +++ b/test/python/pyreduce.jsonnet @@ -0,0 +1,24 @@ +{ + source: { + plugin: 'generate_layers', + layers: { + event: { parent: "job", total: 10, starting_number: 1 } + } + }, + modules: { + cppdriver: { + plugin: 'cppdriver4py', + }, + pyreduce: { + plugin: 'pymodule', + pyplugin: 'reducer', + input: ['i', 'j'], + }, + pyverify: { + plugin: 'pymodule', + pyplugin: 'verify', + input: ['sum'], + sum_total: 4, + }, + }, +} diff --git a/test/python/pyvec.jsonnet b/test/python/pyvec.jsonnet new file mode 100644 index 000000000..cf9ba3487 --- /dev/null +++ b/test/python/pyvec.jsonnet @@ -0,0 +1,25 @@ +{ + source: { + plugin: 'generate_layers', + layers: { + event: { parent: "job", total: 10, starting_number: 1 } + } + }, + modules: { + cppdriver: { + plugin: 'cppdriver4py', + }, + pysum: { + plugin: 'pymodule', + pyplugin: 'sumit', + input: ['i', 'j'], + output: ['sum'], + }, + pyverify: { + plugin: 'pymodule', + pyplugin: 'verify', + input: ['sum'], + sum_total: 1, + }, + }, +} diff --git a/test/python/reducer.py b/test/python/reducer.py new file mode 100644 index 000000000..90501c281 --- /dev/null +++ b/test/python/reducer.py @@ -0,0 +1,72 @@ +"""Explicit reduction algorithm. + +This test code implements a reduction by explicitly coding it up in the +Phlex execution graph. The goal is to try to creating node naming issues +to ensure these either don't happen or are properly resolved by Phlex: + + - Re-uses the same Python algorithm in different nodes, this means that + simply taking the name from reflection can not be relied upon. + - Several Python algorithms receive the same C++ inputs, which causes + either converter node naming problems (if converters are reused) or + lifetime issues (if they are not). +""" + +def add(i: int, j: int) -> int: + """Add the inputs together and return the sum total. + + Use the standard `+` operator to add the two inputs together + to arrive at their total. + + Args: + i (int): First input. + j (int): Second input. + + Returns: + int: Sum of the two inputs. + + Examples: + >>> add(1, 2) + 3 + """ + return i + j + + +def PHLEX_EXPERIMENTAL_REGISTER_ALGORITHMS(m, config): + """Register a series of `add` algorithm as transformations. + + Use the standard Phlex `transform` registration to insert nodes + in the execution graph that receive two inputs and produces their + sum as an ouput. The idea is a workflow from multiple independent + sources and make sure the GIL is properly handed off such that no + live-locks occur. + + Args: + m (internal): Phlex registrar representation. + config (internal): Phlex configuration representation. + + Returns: + None + """ + # first recieve the same input x4 but return "different" output + for i in range(4): + m.transform(add, + name = "reduce%d" % i, + input_family = config["input"], + output_products = ["sum%d" % i]) + + # now reduce them pair-wise + m.transform(add, + name = "reduce01", + input_family = ["sum0", "sum1"], + output_products = ["sum01"]) + m.transform(add, + name = "reduce23", + input_family = ["sum2", "sum3"], + output_products = ["sum23"]) + + # once more (and the configuration will add a verifier) + m.transform(add, + name = "reduce", + input_family = ["sum01", "sum23"], + output_products = ["sum"]) + diff --git a/test/python/registry.py b/test/python/registry.py deleted file mode 100644 index 6352db06d..000000000 --- a/test/python/registry.py +++ /dev/null @@ -1,33 +0,0 @@ -"""This module provides a function to register Python algorithms with the Phlex framework.""" -import types - -import cppyy -import pyphlex # noqa: F401 - -__all__ = ["pyphlex"] -cpp = cppyy.gbl -phlex = cpp.phlex.experimental - -cppyy.include("Python.h") - -_registered_modules: dict[str, types.ModuleType] = {} - - -def register(m, config): - config = cppyy.bind_object(config, "phlex::experimental::configuration") - pymod_name = str(config.get["std::string"]("pymodule")) - pyalg_name = str(config.get["std::string"]("pyalg")) - - inputs = tuple(str(x) for x in config.get["std::vector"]("input")) - outputs = tuple(str(x) for x in config.get["std::vector"]("output")) - - try: - pymod = _registered_modules[pymod_name] - except KeyError: - pymod = __import__(pymod_name) - _registered_modules[pymod_name] = pymod - - pyalg = getattr(pymod, pyalg_name) - - graph = cppyy.bind_object(m, "phlex::experimental::graph_proxy") - graph.with_(pyalg_name, pyalg, phlex.concurrency.serial).transform(*inputs).to(*outputs) diff --git a/test/python/sumit.py b/test/python/sumit.py new file mode 100644 index 000000000..1ea28fafc --- /dev/null +++ b/test/python/sumit.py @@ -0,0 +1,70 @@ +"""A most basic algorithm using a type that is not a C++ builtin. + +This test code implements the smallest possible run that does something +real with a non-builtin type: it exercises numpy support by using it as +output and input of algorithms. +""" + +import numpy as np +import numpy.typing as npt + + +def collectify(i: int, j: int) -> npt.NDArray[np.int32]: + """Combine the inputs into an array. + + Create a numpy array of 2 elements from the separate inputs. + + Args: + i (int): First input. + j (int): Second input. + + Returns: + ndarray: Array represntation of the two inputs. + + Examples: + >>> collectify(1, 2) + array([1, 2], dtype=int32) + """ + return np.array([i, j], dtype=np.int32) + +def sum_array(coll: npt.NDArray[np.int32]) -> int: + """Add the elements of the input collection and return the sum total. + + Use the builtin `sum` function to add the elements from the input + collection to arrive at their total. + + Args: + coll (ndarray): numpy array of input values. + + Returns: + int: Sum of the elements of the input collection. + + Examples: + >>> sum_array(np.array[1, 2])) + 3 + """ + return sum(coll) + + +def PHLEX_EXPERIMENTAL_REGISTER_ALGORITHMS(m, config): + """Register algorithms exercising numpy arrays. + + Use the standard Phlex `transform` registration to insert a node + in the execution graph that creates and one that receives a numpy + array. The final result is the sum of the elements as an ouput. The + labels of inputs and outputs are taken from the configuration. + + Args: + m (internal): Phlex registrar representation. + config (internal): Phlex configuration representation. + + Returns: + None + """ + m.transform(collectify, + input_family = config["input"], + output_products = ["my_pyarray"]) + m.transform(sum_array, + input_family = ["my_pyarray"], + output_products = config["output"]) + diff --git a/test/python/test_phlex.py b/test/python/test_phlex.py index 92f78697e..ba1be4f74 100644 --- a/test/python/test_phlex.py +++ b/test/python/test_phlex.py @@ -7,9 +7,9 @@ class TestPYPHLEX: @classmethod def setup_class(cls): """Set up the test class.""" - import pyphlex # noqa: F401 + import phlexpy # noqa: F401 - __all__ = ["pyphlex"] # noqa: F841 # For CodeQL + __all__ = ["phlexpy"] # noqa: F841 # For CodeQL def test01_phlex_existence(self): """Test existence of the phlex namespace.""" diff --git a/test/python/verify.py b/test/python/verify.py new file mode 100644 index 000000000..4aed8c869 --- /dev/null +++ b/test/python/verify.py @@ -0,0 +1,72 @@ +"""An observer to check for output in tests. + +Test algorithms produce outputs. To ensure that a test is run correctly, +this observer verifies its result against the expected value. +""" + +class Verifier: + """A callable class that can assert an expected value. + + Attributes: + __name__ (str): Identifier for Phlex. + + Examples: + >>> v = Verifier(42) + >>> v.(42) + >>> v.(21) + Traceback (most recent call last): + File "", line 1, in + File "verify.py", line 22, in __call__ + assert value == self._sum_total + ^^^^^^^^^^^^^^^^^^^^^^^^ + AssertionError + """ + + __name__ = 'verifier' + + def __init__(self, sum_total: int): + """Create a verifier object. + + Args: + sum_total (int): The expected value. + + Returns: + None + """ + self._sum_total = sum_total + + def __call__(self, value: int) -> None: + """Verify a the `value`. + + Check that `value` matches the pre-registered value. + + Args: + value (int): The value to verify. + + Raises: + AssertionError: if the provided value does not matches the + pre-registed value. + + Returns: + None + """ + assert value == self._sum_total + + +def PHLEX_EXPERIMENTAL_REGISTER_ALGORITHMS(m, config): + """Register an instance of `Verifier` as an observer. + + Use the standard Phlex `observe` registration to insert a node in + the execution graph that receives a summed total to check against an + expected value. The expected total is taken from the configuration. + + Args: + m (internal): Phlex registrar representation. + config (internal): Phlex configuration representation. + + Returns: + None + """ + assert_sum = Verifier(config["sum_total"]) + m.observe(assert_sum, input_family = config["input"]) +