diff --git a/.all-contributorsrc b/.all-contributorsrc index 2001a788..0faa7c97 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -193,6 +193,36 @@ "design", "review" ] + }, + { + "login": "aragilar", + "name": "James Tocknell", + "avatar_url": "https://avatars.githubusercontent.com/u/1281144?v=4", + "profile": "http://web.science.mq.edu.au/directory/listing/person.htm?id=tjames", + "contributions": [ + "code", + "review" + ] + }, + { + "login": "frostming", + "name": "Frost Ming", + "avatar_url": "https://avatars.githubusercontent.com/u/16336606?v=4", + "profile": "https://frostming.com", + "contributions": [ + "code", + "review" + ] + }, + { + "login": "hugovk", + "name": "Hugo van Kemenade", + "avatar_url": "https://avatars.githubusercontent.com/u/1324225?v=4", + "profile": "https://github.com/hugovk", + "contributions": [ + "code", + "review" + ] } ], "contributorsPerLine": 7, diff --git a/.github/workflows/artifact_redirect.yml b/.github/workflows/artifact_redirect.yml new file mode 100644 index 00000000..9d61e600 --- /dev/null +++ b/.github/workflows/artifact_redirect.yml @@ -0,0 +1,18 @@ +name: Book Preview + +on: [status] + +jobs: + circleci_artifacts_redirector_job: + runs-on: ubuntu-latest + if: "${{ github.event.context == 'ci/circleci: build_book' }}" + name: Run CircleCI artifacts redirector + steps: + - name: GitHub Action step + id: step1 + uses: larsoner/circleci-artifacts-redirector-action@master + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + artifact-path: 0/html/index.html + circleci-jobs: build_book + job-title: Click to preview rendered book diff --git a/.gitignore b/.gitignore index ce63f9f7..49fb9bc9 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ tmp/ .DS_Store .nox __pycache__ +*notes-from-review.md +*.idea* diff --git a/README.md b/README.md index f94bcf2e..47d1dddd 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # pyOpenSci Scientific Python Open Source Packaging Guide -[![All Contributors](https://img.shields.io/badge/all_contributors-18-orange.svg?style=flat-square)](#contributors-) +[![All Contributors](https://img.shields.io/badge/all_contributors-21-orange.svg?style=flat-square)](#contributors-) ![GitHub release (latest by date)](https://img.shields.io/github/v/release/pyopensci/python-package-guide?color=purple&display_name=tag&style=plastic) @@ -99,6 +99,9 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Pradyun Gedam
Pradyun Gedam

πŸ’» 🎨 πŸ‘€ Ofek Lev
Ofek Lev

πŸ’» 🎨 πŸ‘€ Chiara Marmo
Chiara Marmo

πŸ’» 🎨 πŸ‘€ + James Tocknell
James Tocknell

πŸ’» πŸ‘€ + Frost Ming
Frost Ming

πŸ’» πŸ‘€ + Hugo van Kemenade
Hugo van Kemenade

πŸ’» πŸ‘€ diff --git a/conf.py b/conf.py index f730762c..54834590 100644 --- a/conf.py +++ b/conf.py @@ -17,12 +17,12 @@ # -- Project information ----------------------------------------------------- -project = 'python-package-guide' -copyright = '2023, pyOpenSci' -author = 'pyOpenSci Community' +project = "python-package-guide" +copyright = "2023, pyOpenSci" +author = "pyOpenSci Community" # The full version, including alpha/beta/rc tags -release = '0.1' +release = "0.1" # -- General configuration --------------------------------------------------- @@ -47,7 +47,7 @@ myst_heading_anchors = 3 # For generating sitemap -html_baseurl = 'https://www.pyopensci.org/software-peer-review/' +html_baseurl = "https://www.pyopensci.org/software-peer-review/" # Link to our repo for easy PR/ editing html_theme_options = { @@ -81,7 +81,7 @@ "header_links_before_dropdown": 4, "use_edit_page_button": True, "show_toc_level": 1, - #"navbar_align": "left", # [left, content, right] For testing that the navbar items align properly + # "navbar_align": "left", # [left, content, right] For testing that the navbar items align properly "github_url": "https://github.com/pyopensci/python-package-guide", "twitter_url": "https://twitter.com/pyopensci", "footer_items": ["copyright"], @@ -99,11 +99,11 @@ # Add analytics to furo theme gtagjs_ids = [ - 'UA-141260825-1', + "UA-141260825-1", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -114,20 +114,20 @@ ".DS_Store", ".github", ".nox", - "README.md" - ] + "README.md", +] # For sitemap -html_baseurl = 'https://www.pyopensci.org/package-review-guide/' +html_baseurl = "https://www.pyopensci.org/package-review-guide/" # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'pydata_sphinx_theme' +html_theme = "pydata_sphinx_theme" html_static_path = ["_static"] html_css_files = ["pyos.css"] +html_title = "pyOpenSci Python Packaging Guide" html_js_files = ["matomo.js"] -html_title = "pyOpenSci Package Guide" html_logo = "images/logo/logo.png" diff --git a/documentation/write-user-documentation/document-your-code-api-docstrings.md b/documentation/write-user-documentation/document-your-code-api-docstrings.md index 6b964357..3607f3e2 100644 --- a/documentation/write-user-documentation/document-your-code-api-docstrings.md +++ b/documentation/write-user-documentation/document-your-code-api-docstrings.md @@ -207,3 +207,302 @@ def add_me(aNum, aNum2): ``` + + +## Beyond docstrings: type hints + +We use docstrings to describe data types that we pass into functions as parameters or +into classes as attributes. *We do it with our users in mind.* + +**What with us – developers?** We can think of ourselves and the new contributors, +and start using *type hinting* to make our journey safer! + +There are solid reasons why to use type hints: + +- Development and debugging are faster, +- We clearly see data flow and its transformations, +- We can use tools like `mypy` or integrated tools of Python IDEs for static type checking and code debugging. + +We should consider type hinting if our package performs data processing, +functions require complex inputs, and we want to lower the entrance barrier for our contributors. +The icing on the cake is that the code in our package will be aligned with the best industry standards. + +But there are reasons to *skip* type hinting: + +- Type hints may make code unreadable, especially when a parameter’s input takes multiple data types and we list them all, +- Writing type hints for simple scripts and functions that perform obvious operations don't make sense. + +Fortunately for us, type hinting is not all black and white. +We can gradually describe the parameters and outputs of some functions but leave others as they are. +Type hinting can be a task for new contributors to get them used to the package structure. +That way, their learning curve about data flow and dependencies between API endpoints will be smoother. + +## Type hints in practice + +Type hinting was introduced with Python 3.5 and is described in [PEP 484](https://peps.python.org/pep-0484/). +**PEP 484** defines the scope of type hinting. Is Python drifting towards compiled languages with this feature? +It is not. Type hints are optional and static. They will work like that in the future until Python is Python. +The power of type hints lies somewhere between docstrings and unit tests, and with it, we can avoid many bugs +throughout development. + +We've seen type hints in the simple example earlier. Let's come back to it and change it slightly: + + +```python +from typing import Dict, List + + +def extent_to_json(ext_obj: List) -> Dict: + """Convert bounds to a shapely geojson like spatial object.""" + ... + +``` + +Here we focus on the new syntax. First, we described the parameter `ext_obj` as the `List` class. How do we do it? +Add a colon after the parameter (variable) and the name of a class that is passed into a function. +It’s not over. Do you see, that the function definition after closing parenthesis is expanded? +If we want to inform the type checker what the function returns, then we create the arrow sign `->` that points to a returned type, +and after it, we put the function’s colon. Our function returns a Python dictionary (`Dict`). + +```{note} +We have exported classes `List` and `Dict` from the `typing` module, but we may use +`list` or `dict` keywords instead. We will achieve the same result. +Capitalized keywords are required when our package uses Python versions that are lower than +Python 3.9. Python 3.7 will be deprecated in June 2023, and Python 3.8 in October 2024. +Thus, if your package supports the whole ecosystem, it should use the `typing` module syntax. +``` + +### Type hints: basic example + +The best way to learn is by example. We will use the [pystiche](https://github.com/pystiche/pystiche/tree/main) package. +To avoid confusion, we start with a mathematical operation: + +```python +import torch + + +def _norm(x: torch.Tensor, dim: int = 1, eps: float = 1e-8) -> torch.Tensor: + ... + +``` + +The function has three parameters: + +- `x` that is required, and its type is `torch.Tensor`, +- `dim`, optional `int` with a default value equal to `1`, +- `eps`, optional `float` with a default value equal to `1e-8`. + +As we see, we can use basic data types to mark simple variables. The basic set of those types is: + +- `int`, +- `float`, +- `str`, +- `bool` +- `complex`. + +We will most frequently use those types within simple functions that are *close to data*. +However, sometimes our variable will be a data structure that isn't built-in within Python itself +but comes from other packages: + +- `Tensor` from `pytorch`, +- `ndarray` from `numpy`, +- `DataFrame` from `pandas`, +- `Session` from `requests`. + +To perform type checking, we must import those classes. Then we can set those as a parameter's type. +The same is true if we want to use classes from within our package (but we should avoid **circular imports**, +the topic we will uncover later). + +### Type hints: complex data types + +We can use type hints to describe other objects available in Python. +A little sample of those objects are: + +- `List` (= `list`) +- `Dict` (= `dict`) +- `Tuple` (= `tuple`) +- `Set` (= `set`) + +How do `pystiche` developers use those objects in their code? Let's take a look at the example below: + +```python +from typing import List, Optional +import torch + + +def _extract_prev(self, idx: int, idcs: List[int]) -> Optional[str]: + ... + +``` + +The function has two parameters. The parameter `idcs` is a list of integers. We may write it as `List[int]` or `List` without +square brackets and data type that is within a list. + +The `_extract_prev` function returns the `Optional` type. It is a special type that describes inputs and output +that can be `None`. There are more interesting types that we can use in our code: + +- `Union` – we can use it to describe a variable of multiple types. An example could be: + +```python +from typing import List, Union +import numpy as np +import pandas as pd + + +def process_data(data: Union[np.ndarray, pd.DataFrame, List]) -> np.ndarray: + ... + +``` + +What's the problem with the example above? The function definition becomes unreadable with more data types passed into the parameter `data`. +We have two solutions for this issue. The first one is to use the `Any` type, which is a wildcard that is equal to not passing any type. + +```python +from typing import Any + + +def process_data(data: Any) -> np.ndarray: + ... + +``` + +The second solution is to think what is a high-level representation of a passed data type. The examples are: + +- `Sequence` – we can use it to describe a variable as a sequence of elements. Sequential are `list`, `tuple`, `range` and `str`. +- `Iterable` – we can use it to describe an iterable variable. Iterables are `list`, `tuple`, `range`, `str`, `dict` and `set`. +- `Mapping` – we can use it to describe a variable that is a mapping. Mappings are `dict` and `defaultdict`. +- `Hashable` – we can use it to describe a hashable variable. Hashables are `int`, `float`, `str`, `tuple` and `frozenset`. +- `Collection` - we can use it to describe a collection variable. Collections are `list`, `tuple`, `range`, `str`, `dict`, `set` and `frozenset`. + +Thus, the function could look like this: + +```python +from typing import Iterable + + +def process_data(data: Iterable) -> np.ndarray: + ... + +``` + +### Type hints: unique objects and interesting cases + +The `typing` module provides us with more objects that we can use to describe our variables. +An interesting object is `Callable` that we can use to describe a variable that is a function. Usually, +when we write decorators or wrappers, we use the `Callable` type. The example in the context of the `pystiche` package: + +```python +from typing import Callable + + +def _deprecate(fn: Callable) -> Callable: + ... + +``` + +The `Callable`can be used as a single word or a word with square brackets with two parameters: `Callable[[arg1, arg2], return_type]`. +The first parameter is a list of arguments, and the second is a function output's data type. + +There is an important case around type hints. Sometimes we want to describe a variable that comes from within +our package. Usually, we can do it without problems: + +```python +from my_package import my_data_class + + +def my_function(data: my_data_class) -> None: + ... + +``` + +And it will work fine. But we may encounter *circular imports* that need to be fixed. What is a *circular import*? +It is a case when we want to import module B into module A, but module A is already imported into module B. +We are importing the same module into itself. The issue is rare when we program without type +hinting. However, with type hints, it could be tedious. + +Thus, if you encounter this error: + +```python +from my_package import my_data_class + + +def my_function(data: my_data_class) -> None: + ... + +``` + +```shell +ImportError: cannot import name 'my_data_class' from partially initialized module 'my_package' (most likely due to a circular import) (/home/user/my_package/__init__.py) +``` + +Then you should use the `typing.TYPE_CHECKING` clause to avoid circular imports. The example: + +```python +from __future__ import annotations +from typing import TYPE_CHECKING + + +if TYPE_CHECKING: + from my_package import my_data_class + + +def my_function(data: my_data_class) -> None: + ... + +``` + +Unfortunately, the solution is *dirty* because we have to +use the `if TYPE_CHECKING` clause and `from __future__ import annotations` import to make it work. It make our +script messier! Type hinting is not only the roses and butterflies! + +The nice feature of type hinting is that we can define variable's type within a function: + +```python +from typing import Dict +import numpy as np + + +def validate_model_input(data: np.ndarray) -> Dict: + """ + Function checks if dataset has enough records to perform modeling. + + Parameters + ---------- + data : np.ndarray + Input data. + + Returns + ------- + : Dict + Dictionary with `data`, `info` and `status` to decide if pipeline can proceed with modeling. + """ + + output: Dict = None # type hinting + + # Probably we don't have the lines below yet + + # if data.shape[0] > 50: + # output = {"data": data, "info": "Dataset is big enough for statistical tests.", "status": True} + # else: + # output = {"data": data, "info": "Dataset is too small for statistical tests.", "status": False} + + return output + +``` + +We will use this feature rarely. The most probable scenario is when we start defining a function and its output, but +we don't know how we will process data. In this context, we can still run type checking to be sure that the +function behaves as we expect within the newly designed pipeline. + +(Another scenario: we will be forced to add type hints to silence dynamic type checkers from some IDEs ;) ). + + +### Type hinting: final remarks + +There are tools designed for static type checking. The most popular one is [`mypy`](https://mypy.readthedocs.io/en/stable/). +Adding it to your Continuous Integration (CI) pipeline is a good idea. +Other tools are integrated with popular IDEs like `PyCharm` or `VSCode`; most are based on `mypy` logic. + +The last thing to remember is that **type hints are optional in all our functions, and we can introduce them gradually, +which won't damage our code and output generated by CI type checking tools**. +It is a very convenient way of using this extraordinary feature! diff --git a/images/python-package-tools-2022-survey-pypa.png b/images/python-package-tools-2022-survey-pypa.png new file mode 100644 index 00000000..c3d7d4f8 Binary files /dev/null and b/images/python-package-tools-2022-survey-pypa.png differ diff --git a/images/python-package-tools-decision-tree.png b/images/python-package-tools-decision-tree.png new file mode 100644 index 00000000..8908e9cf Binary files /dev/null and b/images/python-package-tools-decision-tree.png differ diff --git a/images/python-package-tools-decision-tree.svg b/images/python-package-tools-decision-tree.svg new file mode 100644 index 00000000..f9c41ac9 --- /dev/null +++ b/images/python-package-tools-decision-tree.svg @@ -0,0 +1,18546 @@ + + + +Do Do I need +environment lock files?Do I want matrixenvironmentsupport for localtesting?Do I need to use aspecific buildback-end?Is my package +pure Python?Does my package +have a few C/C++ extensions?Do I need lock files?Useanytool!Hatch,PDM,**BuildChoices:PDM**BuildFront-endChoices:HatchlingScikit-BuildMeson-PythonSetuptoolsHatchlingScikit-BuildMeson-PythonSetuptoolsBackend Choices:Backend Choices:Anytool!PDMPoetryPDMHatchChoices:Use:Use:Use:PDMUse:Choices:PDM, +Hatch, +Poetry, +** BuildPick a Packaging ToolPure Python packages can use any back-end. +Consider starting with a default back-end for the tool that you select (ie use pdm-back-end with the PDM front-end ** Build is a front-end tool that is ONLY used to create your package's sdist and Wheel distributions.Does my package +have many C/C++ extensions & wrap other languages +(e.g. fortran)?YESYESYESNOYESYESYesNOYESNONOpyOpenSci diff --git a/index.md b/index.md index 654054b4..61eb5cbc 100644 --- a/index.md +++ b/index.md @@ -1,18 +1,19 @@ # pyOpenSci Python Open Source Package Development Guide - ```{toctree} :hidden: :caption: Documentation Documentation + ``` ```{toctree} :hidden: :caption: Packaging -Packaging +Packaging + ``` ```{toctree} @@ -28,7 +29,8 @@ https://github.com/pyOpenSci/python-package-guide/community --> ## Welcome, Python open source enthusiast! Here you will find guidelines for what we look for in your scientific -Python package when reviewing. You will also find best practice recommendations and curated lists of community resources surrounding packaging and documentation. +Python package when reviewing. You will also find best practice recommendations and curated lists of community resources surrounding packaging and documentation. Our goal is to help the +community make decisions around how to create scientific Python packages. We are working towards a shared vision of packaging that helps users better understand where to start. ::::{grid} 2 :reverse: @@ -43,7 +45,6 @@ Python package when reviewing. You will also find best practice recommendations :columns: 8 :class: sd-fs-3 - ```{button-link} https://www.pyopensci.org/about-peer-review/ :color: primary :class: sd-rounded-pill float-left @@ -59,7 +60,6 @@ Learn about our open peer review process ::: :::: - ::::{grid} 1 1 2 2 @@ -69,7 +69,6 @@ Learn about our open peer review process :::{grid-item-card} :link: documentation/index :link-type: doc -:class-header: bg-light ✨ Documentation Criteria & Recommendations ✨ ^^^ @@ -80,11 +79,18 @@ documentation that are commonly used in the scientific Python community. ::: +:::{grid-item-card} +:link: package-structure-code/intro +:link-type: doc + +✨ Python packaging tools & structure ✨ +^^^ +All of the modern tools discussed in this guide will help you build an efficient packaging workflow. This section helps you select the tool that will work best for you. +::: :::{grid-item-card} :link: CONTRIBUTING :link-type: doc -:class-header: bg-light ✨ Want to contribute? ✨ ^^^ @@ -94,11 +100,12 @@ contribute. :::: ## Who this guidebook is for + We assume that you are here because you are: +1. Looking for guidance on creating a Python package. +1. Looking for resources associated with Python packaging. 1. Considering submitting a package to pyOpenSci and want to understand what we are looking for when we review your package -2. Looking for guidance on creating a Python package. -3. Looking for resources associated with Python packaging. Well, friend, you've come to the right place! @@ -106,9 +113,9 @@ Well, friend, you've come to the right place! This guidebook contains: -* Explanation for "Good enough" minimum requirements associated with being reviewed by pyOpenSci -* Explanation of better and best practices in case you want to set the bar higher for your package (which we hope you will)! -* A curated list of resources to help you get your package into documented, usable and tested shape. +- Explanation for "Good enough" minimum requirements associated with being reviewed by pyOpenSci +- Explanation of better and best practices in case you want to set the bar higher for your package (which we hope you will)! +- A curated list of resources to help you get your package into documented, usable and tested shape. ## Where this guide is headed @@ -118,38 +125,3 @@ Good meets the requirements. Going beyond the minimum can make package maintenan This guide is now a work in progress. If you have ideas of things you'd like to see here, [we invite you to open an issue on GitHub that details any changes or additions that you'd like to see.](https://github.com/pyOpenSci/python-package-guide/issues). - - - - - diff --git a/package-structure-code/complex-python-package-builds.md b/package-structure-code/complex-python-package-builds.md new file mode 100644 index 00000000..58183c4e --- /dev/null +++ b/package-structure-code/complex-python-package-builds.md @@ -0,0 +1,19 @@ +# Complex Python package builds + +This guide is focused on packages that are either pure-python or that +have a few simple extensions in another language such as C or C++. + +In the future, we want to provide resources for packaging workflows that require more complex builds. If you have questions about these types of package, please [add a question to our discourse](https://pyopensci.discourse.group/) or open an [issue about this guide specifically in the GitHub repo for this guide](https://github.com/pyOpenSci/python-package-guide/issues). There are many nuances to building and distributing Python packages that have compiled extensions requiring non-Python dependencies at build time. For an overview and thorough discussion of these nuances, please see [this site.](https://pypackaging-native.github.io/) + +## Pure Python Packages vs. packages with extensions in other languages + +You can classify Python package complexity into three general categories. These +categories can in turn help you select the correct package front-end and +back-end tools. + +1. **Pure-python packages:** these are packages that only rely on Python to function. Building a pure Python package is simpler. As such, you can chose a tool below that has the features that you want and be done with your decision! + +2. **Python packages with non-Python extensions:** These packages have additional components called extensions written in other languages (such as C or C++). If you have a package with non-python extensions, then you need to select a build back-end tool that allows you to add additional build steps needed to compile your extension code. Further, if you wish to use a front-end tool to support your workflow, you will need to select a tool that supports additional build setups. In this case, you could use setuptools. However, we suggest that you chose build tool that supports custom build steps such as Hatch with Hatchling or PDM. PDM is an excellent choice as it allows you to also select your build back-end of choice. We will discuss this at a high level on the complex builds page. + +3. **Python packages that have extensions written in different languages (e.g. Fortran and C++) or that have non Python dependencies that are difficult to install (e.g. GDAL)** These packages often have complex build steps (more complex than a package with just a few C extensions for instance). As such, these packages require tools such as [scikit-build](https://scikit-build.readthedocs.io/en/latest/) + or [meson-python](https://mesonbuild.com/Python-module.html) to build. NOTE: you can use meson-python with PDM. diff --git a/package-structure-code/intro.md b/package-structure-code/intro.md new file mode 100644 index 00000000..e014f7ff --- /dev/null +++ b/package-structure-code/intro.md @@ -0,0 +1,82 @@ +# Python package structure information + +This section provides guidance on your Python package's structure, code formats and style. It also reviews the various packaging tools that you can use to +support building and publishing your package. + +If you are confused by Python packaging, you are not alone! +The good news is there are some great modern packaging +tools that ensure that you're following best practices. Here, we +review tool features and suggest tools that might be best fitted for your workflow. + +:::{figure-md} fig-target + +Figure showing a decision tree with the various packaging tool front end and back end options. + +Diagram showing the various from end build tools that you can select from. See the packaging tools page to learn more about each tool. +::: + +```{note} +If you are considering submitting a package for peer review, have a look at the +bare-minimum [editor checks](https://www.pyopensci.org/software-peer-review/how-to/editor-in-chief-guide.html#editor-checklist-template) that pyOpenSci +performs before a review begins. These checks are useful to explore +for both authors planning to submit a package to us for review and for +anyone who is just getting started with creating a Python package. + +``` + +## What you will learn here + +In this section of our Python packaging guide, we: + +- Provide an overview of the options available to you when packaging your tool +- Suggest tools and approaches that both meet your needs and also support existing standards. +- Suggest tools and approaches that will allow you to expand upon a workflow that may begin as a pure Python tool and evolve into a tool that requires addition layers of complexity in the packaging build. +- Align our suggestions with the most current, accepted + [PEPs (Python Enhancement Protocols)](https://peps.python.org/pep-0000/) and the [scientific-python community SPECs](https://scientific-python.org/specs/). +- In an effort to maintain consistency within our community, we also align with existing best practices being implemented by developers of core Scientific Python packages such as Numpy, SciPy and others. + +## Guidelines for pyOpenSci's packaging recommendations + + + +The flexibility of the Python programming language lends itself to a diverse +range of tool options for creating a Python package. Python is so flexible that +it is one of the few languages that can be used to wrap around other languages. The ability of Python to wrap other languages one the reasons why you will often hear Python described as a ["glue" language](https://numpy.org/doc/stable/user/c-info.python-as-glue.html)" + +If you are building a pure Python package, then your packaging setup can be +simple. However, some scientific packages have complex requirements as they may +need to support extensions or tools written in other languages such as C or C++. + +To support the many different uses of Python, there are many ways to create a +Python package. In this guide, we suggest approaches for packaging approaches and tools based +upon: + +1. What we think will be best and easiest to adopt for those who are newer to packaging +2. Tools that we think are well maintained and documented. +3. A shared goal of standardizing packaging approaches across this (scientific) Python ecosystem. + +Here, we also try to align our suggestions with the most current, accepted +[Python community](https://packaging.python.org/en/latest/) and [scientific community](https://scientific-python.org/specs/). + +```{admonition} Suggestions in this guide are not pyOpenSci review requirements +:class: important + +The suggestions for package layout in this section are made with the +intent of being helpful; they are not specific requirements for your +package to be reviewed and accepted into our pyOpenSci open source ecosystem. + +Please check out our [package scope page](https://www.pyopensci.org/software-peer-review/about/package-scope.html) and [review requirements in our author guide](https://www.pyopensci.org/software-peer-review/how-to/author-guide.html#) if you are looking for pyOpenSci's Python package review requirements! +``` + +```{toctree} +:hidden: +:caption: Package structure & code style + +Intro + +Python package structure +pyproject.toml Package Metadata +What are SDist & Wheel Files? +Package Build Tools +Complex Builds +``` diff --git a/package-structure-code/pyproject-toml-python-package-metadata.md b/package-structure-code/pyproject-toml-python-package-metadata.md new file mode 100644 index 00000000..7a31f2a6 --- /dev/null +++ b/package-structure-code/pyproject-toml-python-package-metadata.md @@ -0,0 +1,104 @@ +# Use a pyproject.toml file for your package configuration & metadata + +The standard file that Python packages use to [specify build requirements and +metadata is called a **pyproject.toml**](https://packaging.python.org/en/latest/specifications/declaring-project-metadata/). Adding metadata, build requirements +and package dependencies to a **pyproject.toml** file replaces storing that +information in a setup.py or setup.cfg file. + +The **pyproject.toml** file is written in [TOML (Tom's Obvious, Minimal Language) format](https://toml.io/en/). TOML is an easy-to-read structure that is founded on key/value pairs. Each section in the **pyproject.toml** file contains a `[table identifier]`. +Below that table identifier are key/value pairs that +support configuration for that particular table. + +### Benefits of using a pyproject.toml file + +Including your package's metadata in a separate human-readable **pyproject.toml** +format also allows someone to view the project's metadata in a GitHub repository. + + + +```{admonition} Setup.py is still useful for complex package builds +:class: tip + +Using **setup.py** to manage package builds and metadata [can cause problems with package development](https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html). +In some cases where a Python package build is complex, a **setup.py** file may +be required. While this guide will not cover complex builds, we will provide +resources working with complex builds in the future. + +``` + +### Example pyproject.toml for building using PDM + +Below is an example build configuration for a Python project. This example +package setup uses: + +- **pdm.backend** to build the [package's sdist and wheels](python-package-distribution-files-sdist-wheel) + +``` +[build-system] +requires = ["pdm-backend>=1.0.0"] +build-back-end = "pdm.backend" + +[project] +name = "examplePy" +authors = [ + {name = "Some Maintainer", email = "some-email@pyopensci.org"} +] +maintainers = [{name = "All the contributors"}] +license = {text = "BSD 3-Clause"} +description = "An example Python package used to support Python packaging tutorials" +keywords = ["pyOpenSci", "python packaging"] +readme = "README.md" + +dependencies = [ + "dependency-package-name-1", + "dependency-package-name-2", +] +``` + +Notice that dependencies are specified in this file. + +### Example pyproject.toml for building using setuptools + +The package metadata including authors, keywords, etc is also easy to read. +Below you can see the same TOML file that uses a different build system (setuptools). +Notice how simple it is to swap out the tools needed to build this package! + +In this example package setup you use: + +- **setuptools** to build the [package's sdist and wheels](python-package-distribution-files-sdist-wheel) +- **setuptools_scm** to manage package version updates using version control tags + +In the example below `[build-system]` is the first table +of values. It has two keys that specify the build front end and back-end for a package: + +1. `requires =` +1. `build-back-end =` + +``` +[build-system] +requires = ["setuptools>=45"] +build-back-end = "setuptools.build_meta" + +[project] +name = "examplePy" +authors = [ + {name = "Some Maintainer", email = "some-email@pyopensci.org"} +] +maintainers = [{name = "All the contributors"}] +license = {text = "BSD 3-Clause"} +description = "An example Python package used to support Python packaging tutorials" +keywords = ["pyOpenSci", "python packaging"] +readme = "README.md" + +dependencies = [ + "dependency-package-name-1", + "dependency-package-name-2", +] +``` + +```{note} +[Click here to read about our packaging build tools including PDM, setuptools, Poetry and Hatch.](/package-structure-code/python-package-build-tools) +``` diff --git a/package-structure-code/python-package-build-tools.md b/package-structure-code/python-package-build-tools.md new file mode 100644 index 00000000..ace87d5f --- /dev/null +++ b/package-structure-code/python-package-build-tools.md @@ -0,0 +1,482 @@ +# Python Package Build Tools + + + +There are a several different build tools that you can use to [create your Python package's _sdist_ and _wheel_ distributions](python-package-distribution-files-sdist-wheel). Below, we discuss the features, +benefits and limitations of the most commonly used Python packaging tools. +We focus on pure-python packages in this guide. However, we also +highlight tools that currently support packages with C/C++ and other language +extensions. + +:::{figure-md} fig-target + +Decision tree diagram showing the various front and back end packaging tools. You can decide what packaging tool to use by thinking about what features you need. PDM is currently the most flexible tool that also supports both non pure Python projects and also using different build back-ends. As such currently PDM is the tool we think beginners might appreciate most with Poetry being a close second. Poetry is ideal for pure python projects. + +Diagram showing the various from end build tools that you can select from. Each tool has different features as highlighted below. +NOTE: this is still a DRAFT so i'm not going to spend time truly cleaning it up until i get lots of feedback on the general approach!! +::: + +If you want to know more about Python packages that have extensions written in +other languages, [check out the page on complex package builds.](complex-python-package-builds) + +### Tools that we review here + +In this section we have selected tools that were returned +as the most popular packaging tools in the PyPA survey. +You will learn more about the following tools on this page: + +- [Twine](https://twine.readthedocs.io/en/stable/), [Build](https://pypa-build.readthedocs.io/en/stable/) + [setuptools](https://setuptools.pypa.io/en/latest/) +- [Flit](https://flit.pypa.io/en/stable/) +- [Hatch](https://hatch.pypa.io/latest/) +- [PDM](https://pdm.fming.dev/latest/) +- [Poetry](https://python-poetry.org/docs/) + +### Summary of tools Hatch vs. PDM vs. Poetry (and setuptools) + +If you are looking for a quick summary, read below. + +- In general, any modern tool that you select from this page will be great to build your package. Selecting a tool comes down to the features that you are looking for in your workflow. +- We suggest that beginners start with a modern workflow tool like PDM as opposed to navigating the complexities of setuptools. +- If you are going to use Poetry (it is the most popular tool and does have the best documentation) beware of the upper bounds dependency additions and consider overriding dependencies when you add them. If you do that Poetry will work well for pure-python builds! Poetry also has an active discord where you can ask questions. + +Below are some features that Hatch and PDM offer that Poetry does not. + +PDM: + +- Supports other back-ends making it ideal for builds that are not pure Python. This means PDM is a great option for both pure python and more complex Python builds as it supports meson-python and other build backends. +- Offers flexibility in dependency management which we like +- Offers lock files if you need them + +Hatch: + +- Offers matrix environment management that allows you to run tests across Python versions. If this feature is important to you, then Hatch is a clear winner. +- Offers a Nox / Make file like tool to streamline your + build workflow. If you are looking to reduce the number of tools in your workflow, Hatch might be for you. + +## Build front-end vs. build back-end tools + +To better understand your options, when it comes to building a Python package, it's important to first understand the difference between a +build tool front-end and build back-end. + +### Build back-ends + +Most packaging tools have a back-end +build tool that builds you package and creates associated +[(sdist and wheel) distribution files](python-package-distribution-files-sdist-wheel). Some tools, such as **Flit**, only +support pure-Python package builds. A pure-Python build refers +to a package build that does not have extensions that are written in another +programming language (such as `C` or `C++`). + +Other packages that have C and C++ extensions (or that wrap other languages such as fortran) require additional code compilation steps when built. +Back-ends such as and **setuptools.build**, **meson.build** +and **scikit-build** support complex builds with custom steps. If your +build is particularly complex (i.e. you have more than a few `C`/`C++` +extensions), then we suggest you use **meson.build** or **scikit-build**. + +### Python package build front-ends + +A packaging front-end tool refers to a tool that makes it easier for you to +perform common packaging tasks using similar commands. These tasks include: + +- [Build your packages (create the sdist and wheel distributions](python-package-distribution-files-sdist-wheel) +- Installing your package in a development mode (so it updates when you update your code) +- Publishing to PyPI +- Running tests +- Building documentation +- Managing an environment or multiple environments in which you need to run tests and develop your package + +There are several Python packaging tools that you can use for pure Python +builds. Each front-end tool discussed below supports a slightly different set of Python +packaging tasks. + +For instance, you can use the packaging tools **Flit**, **Hatch** or **PDM** +to both build and publish your package to PyPI. However while **Hatch** and +**PDM** support versioning and environment management, **Flit** does not. If you want a tool that supports dependency +locking, you can use **PDM** or **Poetry** but not **Hatch**. +If you only need to build your package's sdist and wheel distribution files, then you can stick with PyPA's Build. You'd then use Twine to publish to PyPI. + +```{note} +If you are using **Setuptools**, there is no default user-friendly build front-end that performs multiple tasks. You will need to use **build** to build your package and **twine** to publish to PyPI. +``` + +### Example build steps that can be simplified using a front-end tool + +Below, you can see how a build tool streamlines your packaging experience. Example to build your package with **Hatch**: + +```bash +# Build your sDist and .whl files +hatch build + +# Example to publish to PyPI: +hatch publish --repo test +``` + +Example build steps using the **setuptools** back-end and **build**: + +```bash +# Build the package +python3 -m build + +# Publish to test PyPI using twine +twine upload -r testpypi dist/* +``` + +## Choosing a build back-end + +Most front-end packaging tools have their own back-end build tool. The build +tool creates your package's (sdist and wheel) distribution files. For pure +Python packages, the main difference between the different build back-ends +discussed below is: + +- How configurable they are - for example, do they allow you to add build steps that support non python extensions? +- How much you need to configure them to ensure the correct files are included in your sdist and wheel distributions. + +### Build back-end support for non pure-python packages + +It is important to note that some build back-ends, such as **Flit-core**, only support +pure Python builds. Other back-ends support C and C++ extensions as follows: + +- setuptools supports builds using C / C++ extensions +- Hatchling (hatch's back-end) supports C / C++ extensions via plugins that the developer creates to customize a build +- PDM's back-end supports C / C++ extensions by using setuptools +- Poetry's back-end supports C/C++ extensions however this functionality is currently undocumented. As such we don't recommend using Poetry for complex or non pure Python builds until it is documented. + +While we won't discuss more complex builds below, we will identify which tools +have documented support for C / C++ extensions. + +## An ecosystem of Python build tools + +Below we introduce several of the most commonly used Python packaging build +front-end tools. We highlight the features that each tool offers as a way to +help you decide what tool might be best for your workflow. + +```{admonition} We do not suggest using setuptools +:class: note + +We suggest that you pick one of the modern tools listed above rather than +setuptools because setuptools will require some additional knowledge +to set up correctly. + +We review setuptools as a back-end because it is still popular. However it is +not the most user friendly option. +``` + +The most commonly used tools in the ecosystem are +setuptools back-end (with build) and Poetry (a front end tool with numerous +features and excellent documentation). + +:::{figure-md} pypa-survey-plot + +Graph showing the results of the 2022 PyPA survey of Python packaging tools. On the x axis is percent response and on the y axis are the tools. + +The Python developers survey results (n=>8,000 PyPI users) show setuptools and poetry as the most commonly used Python packaging tools. The core tools that we've seen being used in the scientific community are included here. [You can view the full survey results by clicking here.](https://drive.google.com/file/d/1U5d5SiXLVkzDpS0i1dJIA4Hu5Qg704T9/view) NOTE: this data represent maintainers across domains and is likely heavily represented by those in web development. So this represents a snapshot across the broader Python ecosystem. +::: + +## Chose a build workflow tool + +The tools that we review below include: + +- Twine, Build + setuptools +- Flit +- Hatch +- PDM +- Poetry + +When you are selecting a tool, you might consider this general workflow of +questions: + +1. **Is your tool pure python? Yes?** You can use any tool that you wish! Pick the tool that has the features that you want to use in your build workflow. We suggest: + +- Flit, Hatch, PDM or Poetry (read below for more) + +1. **Does your tool have a few C or C++ extensions?** Great, we suggest using + **PDM** for the time being. It is the only tool in the list below that has both documented + workflow to support such extensions and support for other back-ends in the case that build hooks are not enough for your workflow. PDM supports other back-ends such as scikit-build and meson-python that will allow you to fully customize your package's build. + +NOTE: You can also use Hatch for non pure python builds. Hatch, similar to PDM, allows you to write your own build hooks or plugins to support custom build steps. But currently, hatch does not support other build back ends. Many of the core scientific packages are moving to meson-python to build their packages. Thus, we appreciate that PDM can work with meson-python specifically. + +## Python packaging tools summary + +Below, we summarize features offered by the most popular build front end tools. It is important to keep in mind that these +front-end tools remove the need to use other core tools in your workflow. For example if you use setuptools, you will need to also use Build and Twine to build your package and publish to PyPI. But if you use Poetry, Hatch or PDM you can do all of those things using the same tool (e.g. `hatch build`, `hatch publish` or `pdm build`, `pdm publish`). + +Note that because setuptools does not offer a front-end interface, it is not +included in the table. + +### Package tool features table + +```{csv-table} +:header: Feature, Flit, Hatch, PDM, Poetry +:widths: 36, 10,10,10,10 +:delim: "|" + +Default Build Back-end| Flit-core| hatchling| PDM| Poetry-core +Use Other Build Backends|βœ– | βœ–|βœ… |βœ– +Dependency management| βœ–|βœ–|βœ…|βœ… +Publish to PyPI| βœ…|βœ…|βœ…|βœ… +Version Control based versioning (using `git tags`)|βœ–|βœ…|βœ…|βœ… +Version bumping|βœ–|βœ…| βœ…| βœ… +Environment Management|βœ–|βœ…| βœ…| βœ… +More than one maintainer? (bus factor)|βœ–|βœ–| βœ–| βœ… +``` + +Notes: + +- _Hatch plans to support using other back-ends and dependency management in the future_ +- Poetry supports semantic versioning. Thus, it will support version bumping following commit messages if you use a tool such as Python Semantic Release + +## PDM + +[PDM is a Python packaging and dependency management tool](https://pdm.fming.dev/latest/). +PDM supports builds for pure Python projects. It also provides multiple layers of +support for projects that have C and C++ extensions. + +```{admonition} PDM support for C and C++ extensions + +PDM supports using the PDM-back-end and setuptools at the same time. +This means that you can run setuptools to compile and build C extensions. +PDM's build back-end receives the compiled extension files (.so, .pyd) and +packages them with the pure Python files. +``` + +### PDM Features + +```{csv-table} +:header: Feature, PDM, Notes +:widths: 20,5,50 +:delim: "|" + +Use Other Build Backends| βœ…| When you setup PDM it allows you to select one of several build back ends including: PDM-core, flit-core and hatchling. PDM also can work with Meson-Python which supports move complex python builds. +Dependency specifications |βœ…|PDM has flexible support for managing dependencies. PDM defaults to using an open bound (e.g. `requests >=1.2`) approach to dependencies. However you can [customize how you want to add dependencies in case you prefer another approach such as that of Poetry which uses an upper bound limit](https://pdm.fming.dev/latest/usage/dependency/#about-update-strategy).** +Environment lock files |βœ…|PDM and Poetry are currently the only tools that create environment lock files. Lock files are often most useful to developers creating web apps where locking the environment is critical for consistent user experience. For community-used packages, you will likely never want to use a lock file. +Environment management |βœ… | PDM provides environment management support. It supports Python virtual environments, conda and a local `__pypackages__` environment which is a newer option in the Python ecosystem. No extensions are needed for this support. +Select your environment type on install |βœ… | When you run `PDM init`, PDM will discover environments that are already on your system and allow you to select one to use for your project. +Publish to PyPI|βœ…|PDM supports publishing to both test PyPI and PyPI +Version Control based versioning|βœ… | PDM has a setuptools_scm like tool built into it which allows you to use dynamic versioning that rely on git tags. +Version bumping| βœ… | PDM supports you bumping the version of your package using standard semantic version terms patch; minor; major +Follows current packaging standards|βœ…|PDM supports current packaging standards for adding metadata to the **pyproject.toml** file. +Install your package in editable mode|βœ…|PDM supports installing your package in editable mode. +Build your sdist and wheel distributions|βœ…| Similar to all of the other tools PDM builds your packages sdist and wheel files for you. +✨Optional use of PEP 582 / local environment directory✨|βœ…| PDM is currently the only tool that optionally supports PEP 582 (having a local environment configuration stored within a `__pypackages__` directory in your working directory). +``` + +```{admonition} PDM vs. Poetry +The functionality of PDM is similar to Poetry. However, PDM also offers +additional, documented support for C extensions and version control based +versioning. As such, PDM is preferred for those working on non pure-Python packages. + +If you are deciding between the Poetry and PDM, a smaller difference is the default way that dependencies are added to your pyproject.toml file. + +* Poetry by default follows strict semantic versioning adding dependencies to your pyproject.toml file [using an upper bounds constraint (`^`)](https://python-poetry.org/docs/dependency-specification/#version-constraints). Upper bounds lock means that Poetry will never bump a dependency to the next major version (i.e. from 1.2 to 2.0). However, you can tell Poetry to use an open bound approach by explicitly adding the package like this: `poetry add requests >= 1.2` rather than just using `poetry add requests` which will result in a upper bound locked (ie Upper bound locks means that requests 2.0 could never be installed even if it came out and your package could benefit from it). +* PDM defaults to open-bounds (`>=`) dependency additions which is the preferred approach in the scientific python ecosystem. However, PDM also allows you to specify the way dependencies are added by default. As such, you can also specify upper-bounds (`^`) using PDM if require that approach. + +Finally there are some nuanced differences in how both tools create lock files which we will not go into detail about here. +``` + +### Challenges with PDM + +PDM is a full-featured packaging tool. However it is not without challenges: + +- Its documentation can be confusing, especially if you are new to + packaging. For example, PDM doesn't provide an end to end beginning workflow in its documentation. +- PDM also only has one maintainer currently. We consider individual maintainer + teams to be a potential risk. If the maintainer finds they no longer have time + to work on the project, it leaves users with a gap in support. Hatch and Flit + also have single maintainer teams. + +[You can view an example of a package that uses PDM here](https://github.com/pyOpenSci/examplePy/tree/main/example4_pdm). The README file for this directly provides you with +an overview of what the PDM command line interface looks like when you use it. + +## Flit + +[Flit is a no-frills, streamlined packaging tool](https://flit.pypa.io/en/stable/) that supports modern Python packaging standards. +Flit is a great choice if you are +building a basic package to use in a local workflow that doesn't require any advanced features. And if your package structure is already created. More on that below. + +### Flit Features + +```{csv-table} +:header: Feature, Flit, Notes +:widths: 20,5,50 +:delim: "|" + +Publish to PyPI and test PyPI|βœ…|Flit supports publishing to both test PyPI and PyPI +Helps you add metadata to your **pyproject.toml** file|βœ…| Flit does support adding metadata to your **pyproject.toml** file following modern packaging standards. +Follows current packaging standards|βœ…|Flit supports current packaging standards for adding metadata to the **pyproject.toml** file. +Install your package in editable mode|βœ…| Flit supports installing your package in editable mode.** +Build your sdist and wheel distributions|βœ…| Flit can be used to build your packages sdist and wheel distributions. +``` + +\*\* NOTE: _If you are using the most current version of pip, it supports both a symlink approach `flit install -s` and `pip install -e .`_ + +```{admonition} Learn more about flit +* [Why use flit?](https://flit.pypa.io/en/stable/rationale.html) +``` + +### Why you might not want to use Flit + +Because Flit is no frills, it is best for basic, quick builds. If you are a +beginner you may want to select Hatch or PDM which will offer you more support +in common operations. + +You may NOT want to use flit if: + +- You want to setup more advanced version tracking and management (using version control for version bumping) +- You want a tool that handles dependency versions (use PDM or Poetry instead) +- You have a project that is not pure Python (Use Hatch, PDM or setuptools) +- You want environment management (use PDM, Hatch or Poetry) + +## Hatch + +[**Hatch**](https://hatch.pypa.io/latest/), similar to Poetry and PDM, provides a +unified command line interface. To separate Hatch from Poetry and PDM, it also +provides an environment manager for testing that will make it easier for +you to run tests locally across different versions of Python. It also offers a +nox / makefile like feature that allows you to create custom build workflows such +as building your documentation locally. This means that you could potentially drop a tool like **Make** or **Nox** from your workflow and use Hatch instead. + +### Hatch features + +```{csv-table} +:header: Feature, Hatch, Notes +:widths: 20,5,50 +:delim: "|" + +Use Other Build Backends|βœ–| Switching out build back-ends is not currently an option with Hatch. However, this feature is planned for a future release. +Dependency management|βœ–| Currently you have to add dependencies manually with Hatch. However a feature to support dependencies management may be added in a future release. +Environment Management |βœ… | Hatch supports Python virtual environments. If you wish to use other types of environments such as Conda, you will need to [install a plugin such as hatch-conda for conda support](https://github.com/OldGrumpyViking/hatch-conda). +Publish to PyPI and test PyPI|βœ…|Hatch supports publishing to both test PyPI and PyPI +Version Control based versioning|βœ… | Hatch offers `hatch_vcs` which is a plugin that uses setuptools_scm to support versioning using git tags. The workflow with `hatch_vcs` is the same as that with `setuptools_scm`. +Version bumping| βœ… | Hatch supports you bumping the version of your package using standard semantic version terms patch; minor; major +Follows current packaging standards|βœ…|Hatch supports current packaging standards for adding metadata to the **pyproject.toml** file. +Install your package in editable mode|βœ–βœ…| You can install your package in editable mode using `pip install -e .` Hatch mentions [editable installs](https://hatch.pypa.io/latest/config/build/#dev-mode) but refers to pip in its documentation. +Build your sdist and wheel distributions|βœ…| Hatch will build the sdist and wheel distributions +✨Matrix environment creation to support testing across Python versions✨|βœ…| The matrix environment creation is a feature that is unique to Hatch in the packaging ecosystem. This feature is useful if you wish to test your package locally across Python versions (instead of using a tool such as tox). +✨[Nox / MAKEFILE like functionality](https://hatch.pypa.io/latest/environment/#selection)✨| βœ…| This feature is also unique to Hatch. This functionality allows you to create workflows in the **pyproject.toml** configuration to do things like serve docs locally and clean your package build directory. This means you may have one less tool in your build workflow. +✨A flexible build back-end: **hatchling**✨| βœ…| **The hatchling build back-end offered by the maintainer of Hatch allows developers to easily build plugins to support custom build steps when packaging. + +``` + +_\*\* There is some argument about this approach placing a burden on maintainers to create a custom build system. But others appreciate the flexibility. The Hatch build hook approach is also comparable with the features offered by PDM._ + +### Why you might not want to use Hatch + +There are a few features that hatch is missing that may be important for some. +These include: + +- Hatch doesn't support adding dependencies. You will have to add them manually. +- Hatch currently doesn't support use with other build back-ends. Lack of support for other build back-ends makes Hatch less desirable for users with more complex package builds. If your package is pure Python, then this won't be an issue. +- Hatch won't by default recognize Conda environments without a plugin. +- Similar to PDM, Hatch's documentation can difficult to work through, particularly if you are just getting started with creating a package. +- Hatch, similar to PDM and Flit currently only has one maintainer. + +## Poetry + +[Poetry is a full-featured build tool.](https://python-poetry.org/) It is also +the second most popular front-end packaging tool (based upon the PyPA survey). +Poetry is user-friendly and has clean and easy-to-read documentation. + +```{note} +While some have used Poetry for Python builds with C/C++ extensions, this support +is currently undocumented. Thus, we don't recommend using Poetry for more complex builds. +``` + +### Poetry features + +```{csv-table} +:header: Feature, Poetry, Notes +:widths: 20,5,50 +:delim: "|" + +Add dependencies to your pyproject.toml file |βœ…|Poetry helps you add dependencies to your `pyproject.toml` metadata. _NOTE: currently Poetry adds dependencies using an approach that is slightly out of alignment with current Python peps - however there is a plan to fix this in an upcoming release._ Poetry also allows you to organize dependencies in groups such as documentation, packaging and tests. +Dependency specification |βœ… |Poetry allows you to be specific about version of dependencies that you add to your package's pyproject.toml file. However, it's default upper bound approach can be problematic for some packages (We suggest you override the default setting when adding dependencies). Read below for more. +Environment management |βœ… | Poetry allows you to either use its built in environment or you can select the environment type that you want to use for managing your package. [Read more about its built in environment management options](https://python-poetry.org/docs/basic-usage/#using-your-virtual-environment). +Lock files| βœ… | Poetry creates a **poetry.lock** file that you can use if you need a lock file for your build. +Publish to PyPI and test PyPI|βœ…|Poetry supports publishing to both test PyPI and PyPI +Version Control based versioning|βœ… | The plugin [Poetry dynamic versioning](https://github.com/mtkennerly/poetry-dynamic-versioning) supports versioning using git tags with Poetry. +Version bumping| βœ… | Poetry supports you bumping the version of your package using standard semantic version terms patch; minor; major +Follows current packaging standards|βœ–βœ…|Poetry does not quite support current packaging standards for adding metadata to the **pyproject.toml** file but plans to fix this in an upcoming release. +Install your package in editable mode|βœ…|Poetry supports installing your package in editable mode using `--editable` +Build your sdist and wheel distributions|βœ…|Poetry will build your sdist and wheel distributions using `poetry build` +``` + + + +### Challenges with Poetry + +Some challenges of Poetry include: + +- Poetry, by default, pins dependencies using an "upper bound" limit specified with the `^` symbol by default. However, this behavior can be over-written by specifying the dependency when you use `Poetry add` as follows: `poetry add "requests>=2.1"` See breakout below for more discussion on issues surrounding upper-bounds pinning. +- _Minor Challenge:_ The way Poetry currently adds metadata to your pyproject.toml file does not does not follow current Python standards. However, this is going to be addressed with Poetry release version 2.0. + +Poetry is an excellent tool. Use caution when using it to pin dependencies as +Poetry's approach to pinning can be problematic for many builds. If you use Poetry, we strongly suggest that you override the default upper bound dependency option. + + + +```{admonition} Challenges with Poetry dependency pinning +:class: important + +By default, Poetry pins dependencies using `^` by default. This `^` symbol means that there is +an "upper bound" to the dependency. Thus poetry will bump a dependency +version to a new major version. Thus, if your package uses a dependency that +is at version 1.2.3, Poetry will never bump the dependency to 2.0 even if +there is a new major version of the package. Poetry will instead bump up to 1.9.x. + +Poetry does this because it adheres to strict semantic versioning which states +that a major version bump (from 1.0 to 2.0 for example) means there are breaking +changes in the tool. However, not all tools follow strict semantic versioning. +[This approach has been found to be problematic by many of our core scientific packages.](https://iscinumpy.dev/post/bound-version-constraints/) + +This approach also won't support others ways of versioning tools, for instance, +some tools use [calver](https://calver.org/) which creates new versions based on the date. +``` + +## Using Setuptools Back-end for Python Packaging with Build Front-end + +[Setuptools](https://setuptools.pypa.io/en/latest/) is the most +mature Python packaging build tool with [development dating back to 2009 and earlier](https://setuptools.pypa.io/en/latest/history.html#). +Setuptools also has the largest number of community users (according to the PyPA +survey). Setuptools does not offer a user +front-end like Flit, Poetry and Hatch offer. As such you will need to use other +tools such as **build** to create +your package distributions and **twine** to publish to PyPI. + +While setuptools is the most commonly used tool, we encourage package maintainers +to consider using a more modern tool for packaging such as Poetry, Hatch or PDM. + +We discuss setuptools here because it's commonly found in the ecosystem and +contributors may benefit from understanding it. + +### Setuptools Features + +Some of features of setuptools include: + +- Fully customizable build workflow +- Many scientific Python packages use it. +- It offers version control based package versioning using **setuptools_scm** +- It supports modern packaging using **pyproject.toml** for metadata +- Supports backwards compatibly for older packaging approaches. + +### Challenges using setuptools + + + +Setuptools has a few challenges: + +- Setuptools does not support interactive features such as auto / tab completion by default if you are working in an IDE like VSCODE and using an editable install for development. [See notes here about pylance support](https://github.com/microsoft/pylance-release/blob/main/TROUBLESHOOTING.md#editable-install-modules-not-found). In comparison, tools such as flit, hatch, PDM support interactive features such as tab / auto completion when using an IDE like VSCODE or pycharm (as long as your version of pip is current!). +- Because **setuptools** has to maintain backwards compatibility across a range of packages, it is + not as flexible in its adoption of modern Python packaging + standards. +- The above-mentioned backwards compatibility makes for a more complex code-base. +- Your experience as a user will be less streamlined and simple using setuptools compared to other tools discussed on this page. + +There are also some problematic default settings that users should be aware of +when using setuptools. For instance: + +- setuptools will build a project without a name or version if you are not using a **pyproject.toml** file + to store metadata. + \*Setuptools also will include all of the files in your package + repository if you do not explicitly tell it to exclude files using a + **MANIFEST.in** file diff --git a/package-structure-code/python-package-distribution-files-sdist-wheel.md b/package-structure-code/python-package-distribution-files-sdist-wheel.md new file mode 100644 index 00000000..1b4a1071 --- /dev/null +++ b/package-structure-code/python-package-distribution-files-sdist-wheel.md @@ -0,0 +1,170 @@ +# The Python Package Source and Wheel Distributions + +There are two core distribution files +that you need to create to publish your Python package to +PyPI source distribution (often called an sdist) and wheel. The sdist contains the raw source +code for your package. The Wheel (.whl) contains the built / compiled files +that can be directly installed onto anyones' computer. + +Learn more about both distributions below. + +```{note} +If your package is a pure python package with no additional +build / compilation steps then the sdist and Wheel distributions will have +similar content. However if your package has extensions in other languages +or is more complex in its build, the two distributions will be very different. + +Also note that we are not discussing conda build workflows in this section. +[You can learn more about conda builds here.](https://conda.io/projects/conda-build/en/latest/user-guide/tutorials/index.html) +``` + +### Source Distribution (sdist) + +**Source files** are the unbuilt files needed to build your +package. These are the "raw / as-is" files that you store on GitHub or whatever +platform you use to manage your code. + +**S**ource **D**istributions are referred to as sdist. As the name implies, a SDIST contains the source code; it has not been +built or compiled in any way. Thus, when a user installs your source +distribution using pip, pip needs to run a build step first. For this reason, you could define a source distribution as a compressed archive that contains everything required to build a wheel (except for project dependencies) without network access. + +Sdist is normally stored as a `.tar.gz` archive (often called a "tarball"). Thus, when a user installs your source distribution using pip, pip needs to run a build step first. + +Below is an example sdist for the stravalib Python package: + + + +``` +stravalib-1.1.0.post2-SDist.tar.gz file contents + +β”œβ”€ πŸ“‚ stravalib +β”‚ β”œβ”€ tests +β”‚ β”‚ β”œβ”€ integration +β”‚ β”‚ β”‚ β”œβ”€ __init__.py +β”‚ β”‚ β”‚ β”œβ”€ conftest.py +β”‚ β”‚ β”‚ β”œβ”€ strava_api_stub.py +β”‚ β”‚ β”‚ └─ test_client.py +β”‚ β”‚ β”œβ”€ unit +β”‚ β”‚ β”‚ β”œβ”€ __init__.py +β”‚ β”‚ β”‚ β”œβ”€ test_attributes.py +β”‚ β”‚ β”‚ β”œβ”€ ... +β”‚ β”‚ β”œβ”€ __init__.py +β”‚ β”‚ β”œβ”€ auth_responder.py +β”‚ β”‚ └─ test.ini-example +β”‚ β”œβ”€ util +β”‚ β”‚ β”œβ”€ __init__.py +β”‚ β”‚ └─ limiter.py +β”‚ β”œβ”€ __init__.py +β”‚ β”œβ”€ _version.py +β”‚ β”œβ”€ _version_generated.py +β”‚ β”œβ”€ attributes.py +β”‚ β”œβ”€ ... +β”œβ”€ stravalib.egg-info +β”‚ β”œβ”€ PKG-INFO +β”‚ β”œβ”€ SOURCES.txt +β”‚ β”œβ”€ dependency_links.txt +β”‚ β”œβ”€ requires.txt +β”‚ └─ top_level.txt +β”œβ”€ CODE_OF_CONDUCT.md +β”œβ”€ CONTRIBUTING.md +β”œβ”€ LICENSE.txt +β”œβ”€ MANIFEST.in +β”œβ”€ Makefile +β”œβ”€ PKG-INFO +β”œβ”€ README.md +β”œβ”€ changelog.md +β”œβ”€ environment.yml +β”œβ”€ pyproject.toml +β”œβ”€ requirements-build.txt +β”œβ”€ requirements.txt +└─ setup.cfg + +``` + +```{admonition} GitHub archive vs SDist +:class: tip +When you make a release on GitHub, it creates a `git archive` that contains all +of the files in your GitHub repository. While these files are similar to an +SDist, these two archives are not the same. The SDist contains a few other +items including a metadata directory and if you use `setuptools_scm` or `hatch_vcs` +the SDist may also contain a file that stores the version. +``` + +### Wheel (.whl files): + +A wheel file is a ZIP-format archive whose filename follows a specific format +(below) and has the extension `.whl`. The `.whl` archive contains a specific +set of files, including metadata that are generated from your project's +pyproject.toml file. The pyproject.toml and other files that may be included in +source distributions are not included in wheels because it is a built +distribution. + +The wheel (.whl) is your built binary distribution. **Binary files** are the built / compiled source files. These files are ready to be installed. A wheel (**.whl**) is a **zip** file containing all of the files needed to directly install your package. All of the files in a wheel are binaries - this means that code is already compiled / built. Wheels are thus faster to install - particularly if you have a package that requires build steps. + +The wheel does not contain any of your +packages configuration files such as **setup.cfg** or **pyproject.toml**. This +distribution is already built so it's ready to install. + +Because it is built, the wheel file will be faster to install for pure Python +projects and can lead to consistent installs across machines. + + + +```{tip} +Wheels are also useful in the case that a package +needs a **setup.py** file to support a more complex build. +In this case, because the files in the wheel bundle +are pre built, the user installing doesn't have to +worry about malicious code injections when it is installed. +``` + +The filename of a wheel contains important metadata about your package. + +Example: **stravalib-1.1.0.post2-py3-none.whl** + +- name: stravalib +- version: 1.1.0 +- build-number: 2 (post2) [(read more about post here)](https://peps.python.org/pep-0440/#post-release-separators) +- py3: supports Python 3.x +- none: is not operating system specific (runs on windows, mac, linux) +- any: runs on any computer processor / architecture + +What a wheel file looks like when unpacked (unzipped): + +``` +stravalib-1.1.0.post2-py3-none.whl file contents: + +β”œβ”€ πŸ“‚ stravalib +β”‚ β”œβ”€ tests +β”‚ β”‚ β”œβ”€ functional +β”‚ β”‚ β”‚ β”œβ”€ __init__.py +β”‚ β”‚ β”‚ β”œβ”€ test_client.py +β”‚ β”‚ β”œβ”€ unit +β”‚ β”‚ β”‚ β”œβ”€ __init__.py +β”‚ β”‚ β”‚ β”œβ”€ test_attributes.py +β”‚ β”‚ β”œβ”€ __init__.py +β”‚ β”‚ β”œβ”€ auth_responder.py +β”‚ β”‚ └─ test.ini-example +β”‚ β”œβ”€ util +β”‚ β”‚ β”œβ”€ __init__.py +β”‚ β”‚ └─ limiter.py +β”‚ β”œβ”€ __init__.py +β”‚ β”œβ”€ _version.py +β”‚ β”œβ”€ _version_generated.py +β”‚ β”œβ”€ attributes.py +β”‚ β”œβ”€ client.py +└─ stravalib-1.1.0.post2.dist-info # Package metadata are stored here + β”œβ”€ LICENSE.txt + β”œβ”€ METADATA + β”œβ”€ RECORD + β”œβ”€ WHEEL + └─ top_level.txt + +``` + +```{tip} +[Read more about the wheel format here](https://pythonwheels.com/) +``` diff --git a/package-structure-code/python-package-structure.md b/package-structure-code/python-package-structure.md new file mode 100644 index 00000000..cce9e8fe --- /dev/null +++ b/package-structure-code/python-package-structure.md @@ -0,0 +1,252 @@ +# Python Package Structure for Scientific Python Projects + +There are two different layouts that you will commonly see +within the Python packaging ecosystem: +[src and flat layouts.](https://packaging.python.org/en/latest/discussions/src-layout-vs-flat-layout/) +Both layouts have advantages for different groups of maintainers. + +We strongly suggest, but do not require, that you use the **src/** layout (discussed below) +for creating your Python package. This layout is also recommended in the +[PyPA packaging guide](https://packaging.python.org/en/latest/tutorials/packaging-projects/). + +```{admonition} pyOpenSci will never require a specific package structure for peer review +:class: important + +We understand that it would be tremendous effort for existing +maintainers to move to a new layout. + +The overview on this page presents recommendations that we think are best for +something getting started with Python packaging or someone who's package is +has a simple build and might be open to moving to a more fail-proof approach. +``` + +An example of the **src/package** layout structure can be seen below. + +``` +myPackageRepoName +β”œβ”€β”€ CHANGELOG.md ┐ +β”œβ”€β”€ CODE_OF_CONDUCT.md β”‚ +β”œβ”€β”€ CONTRIBUTING.md β”‚ +β”œβ”€β”€ docs β”‚ Package documentation +β”‚ └── index.md +β”‚ └── ... β”‚ +β”œβ”€β”€ LICENSE β”‚ +β”œβ”€β”€ README.md β”˜ +β”œβ”€β”€ pyproject.toml ┐ +β”œβ”€β”€ src β”‚ +β”‚ └── myPackage β”‚ Package source code, metadata, +β”‚ β”œβ”€β”€ __init__.py β”‚ and build instructions +β”‚ β”œβ”€β”€ moduleA.py β”‚ +β”‚ └── moduleB.py β”˜ +└── tests ┐ + └── ... β”˜ Package tests +``` + +Note the location of the following directories in the example above: + +- **docs/:** discussed in our docs chapter, this directory contains your user-facing documentation website. In a **src/** layout docs/ are normally included at the same directory level of the **src/** folder. +- **tests/** this directory contains the tests for your project code. In a **src/** layout tests are normally included at the same directory level of the **src/** folder. +- **src/package/**: this is the directory that contains the code for your Python project. "Package" is normally your project's name. + +Also in the above example, notice that all of the core documentation files that +pyOpenSci requires live in the root of your project directory. These files +include: + +- CHANGELOG.md +- CODE_OF_CONDUCT.md +- CONTRIBUTING.md +- LICENSE.txt +- README.md + +```{button-link} https://www.pyopensci.org/python-package-guide/documentation +:color: primary +:class: sd-rounded-pill + +Click here to read about our packaging documentation requirements. +``` + +While we recommend the **src/** layout we also review the **flat** layout here. Both are used in the Python ecosystem. + +```{admonition} Example scientific packages that use **src/package** layout + +* [Sourmash](https://github.com/sourmash-bio/sourmash) +* [bokeh](https://github.com/bokeh/bokeh) +* [openscm](https://github.com/openscm/openscm-runner) +* [awkward](https://github.com/scikit-hep/awkward) +* [poliastro](https://github.com/poliastro/poliastro/) + +``` + +### The src/ layout and testing + +The benefit of using the **src/package** layout, particularly if you +are creating a new package, is that it ensures tests are run against the +installed version of your package rather than the files in your package +working directory. If you run your tests on your files rather than the +installed version, you may be missing issues that users encounter when +your package is installed. + +If `tests/` are outside of the **src/package** directory, they aren't included in the package wheel. This makes your package size slightly smaller which then places places a smaller storage burden on PyPI which has over 400,000 packages to support. + +- [Read more about reasons to use the **src/package** layout](https://hynek.me/articles/testing-packaging/) + +```{admonition} How Python discovers and prioritizes importing modules + +By default, Python adds a module in your current working directory to the front of the Python module search path. + +This means that if you run your tests in your packages working directory, using a flat layout, `/package/module.py`, Python will discover `package-name/module.py` file before it discovers the installed package. + +However, if your package lives in a src/ directory structure **src/package** then it won't be, by default, added to the Python path. This means that when you import your package, Python will be forced to search the active environment (which has your package installed). + +Note: Python versions 3.11 and above have a path setting that can be adjusted to ensure the priority is to use installed packages first (e.g. `PYTHONSAFEPATH`). +``` + +#### Sometimes tests are needed in a distribution + +We do not recommend including tests as part of your package wheel by default. However, not including tests in your package distribution will make it harder for people other than yourself to test whether your package is functioning correctly on their system. If you have a small test suite (Python files + data), and think your users may want to run tests locally on their systems, you can include tests by moving the `tests/` directory into the **src/package** directory (see example below). + +```bash +src/ + package-name/ + tests/ +docs/ +``` + +Including the **tests/** directory in your **src/package** directory ensures that tests will be included in your package's wheel. + +Be sure to read the [pytest documentation for more about including tests in your package distribution](https://docs.pytest.org/en/7.2.x/explanation/goodpractices.html#choosing-a-test-layout-import-rules). + +```{admonition} Challenges with including tests and data in a package wheel +:class: tip + +Tests, especially when accompanied by test data can create a few small challenges including: + +- Take up space in your distribution which will build up over time as storage space on PyPI +- Large file sizes can also slow down package install. + +However, in some cases, particularly in the scientific Python ecosystems you may need to include tests. +``` + +### **Don't include test suite datasets in your package** + +If you do include your tests in your package distribution, we strongly +discourage you from including data in your test suite directory. Rather, +host your test data in a repository such as Figshare or Zenodo. Use a +tool such as [Pooch](https://www.fatiando.org/pooch/latest/) to access +the data when you (or a user) runs tests. + +Check out the testing section of our guide for more information about tests. + +- The **src/package** layout is semantically more clear. Code is always found in the + **src/package** directory, `tests/` and `docs/`are in the root directory. + +```{important} +If your package tests require data, we suggest that you do NOT include that +data within your package structure. We will discuss this in more detail in a +tutorial. Include data in your package structure increases the size of your +distribution files. This places a maintenance toll on repositories like PyPI and +anaconda cloud that have to deal with thousands of package uploads. +``` + +## About the flat Python package layout + +Currently most scientific packages use the **flat-layout** given: + +- It's the most commonly found layout with the scientific Python ecosystem and + people tend to look to other packages / maintainers that they respect for examples + of how to build Python packages. +- Many Python tools depend upon tools in other language and / or complex builds + with compilation steps. Many developers thus appreciate / are used to features + of the flat layout. + +While we present this layout here in our guide, we suggest that those just +getting started with python packaging start with the src/package layout +discussed above. Numerous packages in the ecosystem [have had to move to a +src/ layout](https://github.com/scikit-build/cmake-python-distributions/pull/145) + +```{admonition} Why most scientific Python packages do not use source +:class: tip + +In most cases the advantages of using the **src/package** layout for +larger scientific packages that already use flat approach are not worth it. +Moving from a flat layout to a **src/package** layout would come at a significant cost to +maintainers. + +However, the advantages of using the **src/package** layout for a beginner are significant. +As such, we recommend that if you are getting started with creating a package, +that you consider using a **src/package** layout. +``` + +## What does the flat layout structure look like? + +The flat layout's primary characteristics are: + +- The source code for your package lives in a directory with your package's + name in the root of your directory +- Often the `tests/` directory also lives within that same `package-name` directory. + +Below you can see the recommended structure of a scientific Python package +using the flat layout. + +```bash +myPackage/ +β”œβ”€β”€ CHANGELOG.md ┐ +β”œβ”€β”€ CODE_OF_CONDUCT.md β”‚ +β”œβ”€β”€ CONTRIBUTING.md β”‚ +β”œβ”€β”€ docs/ β”‚ Package documentation +β”‚ └── ... β”‚ +β”œβ”€β”€ LICENSE β”‚ +β”œβ”€β”€ README.md β”˜ +β”œβ”€β”€ pyproject.toml ] Package metadata and build configuration +| myPackage/ ┐ +β”‚ β”œβ”€β”€ __init__.py β”‚ Package source code +β”‚ β”œβ”€β”€ moduleA.py β”‚ +β”‚ └── moduleB.py β”˜ + tests/ ┐ + └── test-file1.py | Package tests + └── .... β”˜ +``` + +### Benefits of using the flat layout in your Python package + +There are some benefits to the scientific community in using the flat layout. + +- This structure has historically been used across the ecosystem and packages + using it are unlikely to change. +- You can directly import the package directly from the root directory. For + some this is engrained in their respective workflows. However, for a beginner + the danger of doing this is that you are not developing and testing against the + installed version of your package. Rather, you are working directly with the + flat files. + +```{admonition} Core scientific Python packages that use the flat layout +:class: tip + +* [numpy](https://github.com/numpy/numpy) +* [scipy](https://github.com/scipy/scipy) +* [pandas](https://github.com/pandas-dev/pandas) +* [xarray](https://github.com/pydata/xarray) +* [Jupyter-core](https://github.com/jupyter/jupyter_core) +* [Jupyter notebook](https://github.com/jupyter/notebook) +* [scikit-learn](https://github.com/scikit-learn/scikit-learn) + +It would be a significant maintenance cost and burden to move all of these +packages to a different layout. The potential benefits of the source layout +for these tools is not worth the maintenance investment. +``` + + + + diff --git a/pyproject.toml b/pyproject.toml index 7b0de45f..29048ea5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,4 +1,4 @@ # https://github.com/codespell-project/codespell#usage [tool.codespell] ignore-words = "codespell-ignore.txt" -skip = "./.git,./.nox,./_static,./_build,codespell-ignore.txt" +skip = "./.git,./.nox,./_static,./_build,codespell-ignore.txt,*.svg"