diff --git a/conf.py b/conf.py index a31db1c0..34e39a13 100644 --- a/conf.py +++ b/conf.py @@ -74,13 +74,15 @@ }, ], "logo": { - "text": "Python Package Guide", + "text": "Python Packaging", "image_dark": "logo-dark-mode.png", "image_light": "logo-light-mode.png", "alt_text": "pyOpenSci Python Package Guide. The pyOpenSci logo is a purple flower with pyOpenSci under it. The o in open sci is the center of the flower", }, - "header_links_before_dropdown": 3, + "header_links_before_dropdown": 4, "use_edit_page_button": True, + "show_nav_level": 2, + "navigation_depth": 3, "show_toc_level": 1, # "navbar_align": "left", # [left, content, right] For testing that the navbar items align properly "github_url": "https://github.com/pyopensci/python-package-guide", diff --git a/index.md b/index.md index 164a3bec..a6fe0599 100644 --- a/index.md +++ b/index.md @@ -1,4 +1,92 @@ -# pyOpenSci Python Open Source Package Development Guide +# pyOpenSci Python Package Guide + +Demystifying the Python packaging ecosystem with recommendations for +scientists to follow. + +::::{grid} 2 +:reverse: + +:::{grid-item} +:columns: 4 +:class: sd-m-auto + +::: + +:::{grid-item} +:columns: 8 +:class: sd-fs-3 + +```{button-link} https://www.pyopensci.org/about-peer-review/ +:color: primary +:class: sd-rounded-pill float-left +Learn about our open peer review process +``` + +```{only} html +![GitHub release (latest by date)](https://img.shields.io/github/v/release/pyopensci/python-package-guide?color=purple&display_name=tag&style=plastic) +[![](https://img.shields.io/github/stars/pyopensci/python-package-guide?style=social)](https://github.com/pyopensci/contributing-guide) +[![DOI](https://zenodo.org/badge/556814582.svg)](https://zenodo.org/badge/latestdoi/556814582) +``` + +::: +:::: + + + +::::{grid} 1 1 2 2 +:class-container: text-center +:gutter: 3 + +:::{grid-item-card} +:link: tutorials/intro +:link-type: doc + +✨ Just getting started? ✨ +^^^ + +If you are new to packaging or just want an opinionated, end-to-end +way to create a package, check out our tutorials +::: + +:::{grid-item-card} +:link: package-structure-code/intro +:link-type: doc + +✨ Want to better understand the ecosystem? ✨ +^^^ +If you are looking to better understand the tools and options in the +diverse Python packaging ecosystem, then this section is for you. Here +you will learn about the most commonly used tools in the ecosystem and +also tools and processes that our community recommends. +::: + +:::{grid-item-card} +:link: CONTRIBUTING +:link-type: doc + +✨ Want to contribute? ✨ +^^^ +We welcome contributions to this guide. Learn more about how you can +contribute. +::: +:::: + +## About + +This guide is designed to guide you through your Python packaging experience. + +It does several things: + +1. it provides an overview of the various elements of a python package +2. it provides and overview of tools to create python packages and recommends best practices +3. it provides end-to-end tutorials to get you on your way creating a package + +```{toctree} +:hidden: +:caption: Tutorials + +Tutorials +``` ```{toctree} :hidden: @@ -23,14 +111,6 @@ Packaging CI & Tests ``` - - -## Welcome, Python open source enthusiast! - -Here you will find guidelines for what we look for in your scientific -Python package when reviewing. You will also find best practice recommendations and curated lists of community resources surrounding packaging and package documentation. - ### pyOpenSci's packaging goals Our goal is to help the @@ -38,7 +118,7 @@ community make decisions around how to create scientific Python packages. We are ### How this guide is created -This guide is created by pyOpenSci through an extensive review process. Each page in the guide has been reviewed by experts in the broader Python packaging landscape including people from : +This guide is created by pyOpenSci through an extensive review process. Each page in the guide has been reviewed by experts in the broader Python packaging landscape including people from: - conda & conda-forge - the python packaging authority diff --git a/tutorials/1-create-environment.md b/tutorials/1-create-environment.md new file mode 100644 index 00000000..f693c8ba --- /dev/null +++ b/tutorials/1-create-environment.md @@ -0,0 +1,125 @@ +# Create a Python development environment + +To get started working on your package you will want to create a +shiny new Python environment. While this is not required, we strongly +encourage you to create an environment that is dedicated to your Python +package. + +Below you learn how to create an environment using both: + +- venv - which is the environment manager that comes with Python +- conda - an environment manager that is tailored to the scientific ecosystem. + +There is no right or wrong environment option. Chose the option that you prefer. + +:::{tip} +If you are a scientist and you work with spatial data or are creating a package that is not pure python, you may prefer conda. But in general pick +the environment tool that works best for you. +::: + +## Create a package development environment Using venv + +venv comes with any Python installation. Thus, you may find it easy to +use it for your package development needs. Instructions on how to create a +new development environment using venv are below. + +1. **Open Terminal or Command Prompt:** + + - Open your terminal or command prompt. + +2. **Navigate to the Project Directory:** + + ```bash + cd path/to/your/project-directory + ``` + +3. **Create a Virtual Environment:** + + ```bash + python -m venv pyos-dev + ``` + +4. **Activate the Virtual Environment:** + + - **On Windows:** + + ```bash + pyos-dev\Scripts\activate + ``` + + - **On macOS/Linux:** + + ```bash + source pyos-dev/bin/activate + ``` + +5. **Install/Manage Dependencies:** + + ```bash + pip install package_name + ``` + +6. **Work on Your Project:** + +7. **Deactivate the Virtual Environment:** + ```bash + deactivate + ``` + +### Additional Notes: + +- Remember to reactivate the environment each time you want to work on your project. +- To delete the virtual environment, delete the `pyos-dev` folder. + +## Creating a PyOS-Dev Environment Using Conda + +Some scientists prefer to use a conda environment for their package development. If that is your preference, follow the steps below. + +1. **Create an Environment File (`env.yml`):** + + - Use a text editor to create a file named `env.yml` and specify the required packages in the YAML format. For instance: + + ```yaml + name: pyos-dev + channels: + - defaults + dependencies: + - python=3.8 + - package_name1 + - package_name2 + # Add other necessary packages + ``` + +2. **Create the Conda Environment from the Environment File:** + + - Open your terminal or command prompt. + + ```bash + conda env create -f env.yml + ``` + + This command will read the `env.yml` file and create a Conda environment named `pyos-dev` with the specified packages. + +3. **Activate the Conda Environment:** + + - Once the environment is created, activate it. + - **On Windows:** + + ```bash + conda activate pyos-dev + ``` + + - **On macOS/Linux:** + + ```bash + source activate pyos-dev + ``` + +4. **Work on Your Project:** + - You're now working in the `pyos-dev` Conda environment. + +### Additional Notes: + +- Remember to activate the environment each time you want to work on your project. +- To deactivate the environment, use `conda deactivate`. +- To delete the environment, you can use `conda env remove -n pyos-dev`. diff --git a/tutorials/2-installable-code.md b/tutorials/2-installable-code.md new file mode 100644 index 00000000..499f1d78 --- /dev/null +++ b/tutorials/2-installable-code.md @@ -0,0 +1,374 @@ +# Make your Python code installable + + + +:::{admonition} Learning Objectives + +In this lesson you will learn: + +- How to make your code installable into a Python environment +- How to create a basic `pyproject.toml` file to declare dependencies and metadata +- How to declare a build backend which will be used to build and install your package (learn more about what build back ends are here - link to guide) +- How to install your package in editable mode for interactive development + ::: + +## Make your package installable + +It’s time to create your Python package. To make your code installable you need: + +- A `pyproject.toml` file +- An (optional but recommended) `__init__.py` file in your code directory +- a specific directory structure +- some code. + +The directory structure you’ll create in this first section looks like this: + +```toml + +pyospackage/ + └─ pyproject.toml + └─ src/ # the src directory ensures your tests always run on the installed + └── pyospackage/ # package directory where code lives, use the package name + ├── __init__.py + ├── add_numbers.py + └──# Add any other .py modules that you want here +``` + +If you already know what all of the elements of this package structure are, you can skip to the next lesson. Otherwise keep reading to learn about each element of the above structure. + +**Maybe have steps here and then add the explanation below??** + +Notice a few things about the above layout + +1. Your package code lives within a `src/packagename` directory. While it’s fine if you wish to use a [flat layout](https://www.pyopensci.org/python-package-guide/package-structure-code/python-package-structure.html#about-the-flat-python-package-layout) containing no `src/` directory, we suggest a src directory as it will make it easier for you to ensure you are always running tests on the installed version of your code. [More here on that](https://www.pyopensci.org/python-package-guide/package-structure-code/python-package-structure.html#the-src-layout-and-testing) +2. Within the `src/` directory you have a package directory called pyospackage/. Use the name of your package for that directory name.. + 1. You also have an `__init__.py` file and all of your python modules in your package directory. +3. The pyproject.toml file lives at the root directory of your package. + +## Init.py and pyproject.toml files + +The `__init__.py` and pyproject.toml files in the above layout are important to understand. + +### What is an init.py file? + +The `__init__.py` file tells python that the directory it’s in should be treated as a Python package. The `__init__.py` file also + +- Allows you to organize multiple modules within the package. +- Allows you to create shortcuts for importing specific functions, and classes into your code (more on that later!) +- Allows you to create a version object for people to call **version** + +:::{admonition} The **init**.py file +:class: tip + +You can technically install a package without an `__init__.py` file since Python 3.3, we suggest that you include it in your package structure as it allows you to make some important customizations to your package’s user experience. +::: + +### What is a pyproject.toml file? + +The **pyproject.toml** file is: + +- where you store your project’s metadata (including its name, authors, license, etc) +- where you store dependencies (the packages that it depends on) +- used to specify and configure what build back end you want to use to create your package. + +A **pyproject.toml**: This file is critical for both installing your package and publishing to pyPi. This is where you will declare your project build system tools, dependencies and metadata. More on that later + +After the `__init__.py` and pyproject.toml files have been added, your package can be built and distributed as an installable Python package using tools like pip. + +Note that the `pyproject.toml` file needs to have the a few basic things for this to work including + +- the build backend that you want to use, +- the project name, and a few other metadata elements. + +Tip: **If you try to pip install a package with no `pyproject.toml` you will get the following error:** + +```bash +GitHub/pyospackage/testme +➜ pip install . +ERROR: Directory '.' is not installable. Neither 'setup.py' nor 'pyproject.toml' found. +``` + +**Tip:** The `pyproject.toml` file replaces some of the functionality of both the setup.py file and setup.cfg files. +{: .notice .notice--tip} + +## Try it yourself - Create your package structure! + +Let’s get started. Create a directory structure similar to the structure below. If you don’t wish to make each of the elements below, you can always [fork and clone and customize the pyOpenSci example package, here](https://github.com/pyOpenSci/pyosPackage) + +## Step 1: Set Up the Package Directory Structure + +Create a new directory for your package. Choose a name for your package, preferably in lowercase and without spaces (e.g., "pyospackage"). + +Inside the package directory, + +- Create a `src/` directory +- Within the `src/` directory, create a directory that is named after your package. This subdirectory will contain your package’s code. +- It is ok if the main directory of your package and the directory in `src/` have the same name + +Next create two files: + +- Inside the package directory, create a new file named `__init__.py` . This file ensures Python sees this directory as a package. You will use this file to customize how parts of your package are imported and to declare your package’s version in a future lesson. +- At the root of your directory, create a file called `pyproject.toml` + +Your final package directory structure should look like this: + +``` +pyospackage/ + └─ pyproject.toml + └─ src/ + └── pyospackage/ + ├── __init__.py +``` + +## Step 2: Add Code to Your Package + +Within the `pyospackage` subdirectory, add 1 or more Python modules +(.py files) containing the code that you want your package to access and run. +If you don't have code already and are just learning how to create a python +package, then create an empty `add_numbers.py` file. + +**Tip:** When you see the word module, we are referring to a .py file containing Python code. +{: .notice .notice--info } + +``` +pyospackage/ + └─ pyproject.toml + └─ src/ + └── pyospackage/ + ├── __init__.py + ├── add_numbers.py + +``` + +## Add some code to your add_numbers module + +If you are following along and making a python package from scratch then you can add the code below to your `add_numbers.py` module. The function below adds two integers together and returns the result. Notice that the code below has a few features that we will review in future tutorials: + +1. It has a [numpy-style docstring ](https://www.pyopensci.org/python-package-guide/documentation/write-user-documentation/document-your-code-api-docstrings.html#three-python-docstring-formats-and-why-we-like-numpy-style) +2. It uses [typing](https://www.pyopensci.org/python-package-guide/documentation/write-user-documentation/document-your-code-api-docstrings.html#adding-type-hints-to-your-docstrings) + +If you aren’t familiar with docstrings or typing yet, that is ok. We will get +to it later in our tutorial series. Or You can review the pyOpenSci [packaging guide](https://www.pyopensci.org/python-package-guide/documentation/write-user-documentation/document-your-code-api-docstrings.html) +for an overview. + +```python +def add_num(a: int, b: int) -> int: + """ + Add two numbers. + + Parameters + ---------- + a : int + The first number to be added. + b : int + The second number to be added. + + Returns + ------- + int + The sum of the two input numbers (a + b). + + Examples + -------- + >>> add_num(3, 5) + 8 + >>> add_num(-2, 7) + 5 + """ + return a + b +``` + +## Add metadata to your `pyproject.toml` file + +Next, you will add some information to your `pyproject.toml` file. You are +are welcome to copy the file we have in our example repo here. + + + +### A brief overview of the TOML file + +The TOML format consists of tables and variables. Tables are sections of information denoted by square brackets: + +`[this-is-a-table]`. + +Tables can contain variables within them defined by an variable name and +an `=` sign. For +instance, a `build-system` table most often holds 2 variables: + +1. `requires = `, which tells a build tool what tools it needs to install prior to building your package. in this case is + [hatchling](https://pypi.org/project/hatchling/) +2. `build-backend` is used to define specific build-backend name, (in this example we are using `hatchling.build`). + +TOML organizes data structures, defining relationships within a configuration +file. You will learn more about the pyproject.toml format in the +[next lesson when you add additional metadata / information to this file.](/python-package-tutorial/pyproject-toml.html) + +```toml +# An example of the build-system table which contains two variables - requires and build-backend +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" +``` + +- Open up your `pyproject.toml` file in your favorite text editor. +- Add the metadata below to your `pyproject.toml` + +```toml +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "pyospackage_gh_user_name" # rename this if you plan to publish to test pypi +# Here you add the package version manually. You will learn how to setup # dynamic versioning in a followup tutorial. +version="1.1" + +``` + +Note that above you manually add your package's version number to the +`pyproject.toml` file. You will learn how to automate defining a package +version using git tags in the version and release your package lesson. + +:::{admonition} The bare minimum needed in a pyproject.toml file +:class: tip + +The core basic information that you need in a `pyproject.toml` file in order to publish on PyPI is the name of your package and the version. However, we suggest that you flesh out your metadata early on in the `pyproject.toml` file. Once you have your project metadata in the pyproject.toml file, you will +rarely update it. In the next lesson you’ll add more metadata and structure to this file. +::: + +## Install your package locally + +At this point you should have: + +1. A project directory structure with a `pyproject.toml` file at the root +2. A package directory containing an empty `__init__.py` file and +3. At least one Python module (e.g. `add_numbers.py`) + +You are now ready to install (and build) your Python package!. + +Let’s try it out. + +- First `cd` into your package directory +- Activate the Python environment that you wish to use. If you need help with working with virtual environments [check out this lesson](1-create-environment). +- Next, open bash and + +```bash +# Activate your environment using conda or venv +# Below we use conda but you can do the same thing with venv! +> conda activate pyosdev +(pyosdev) +>> conda info + active environment : pyosdev + active env location : /Users/your-path/mambaforge/envs/pyosdev +# Install the package +>> python -m pip install -e . + +Obtaining file:///Users/leahawasser/Documents/GitHub/pyos/pyosPackage + Installing build dependencies ... done + Checking if build backend supports build_editable ... done + Getting requirements to build editable ... done + +# Check to see if the package is installed +> conda list +# use pip list instead of conda list here if you are working in an venv environment rather than a conda envt +``` + +`python -m pip install -e .` installs your package into the current active +Python active environment in **editable mode**. Installing your package in +editable mode, allows you to work on your code and then test the updates +interactively. If you wish to install the package regularly (not in editable +mode) you can use: + +- `python -m pip install . ` + +:::{admonition} +:class: tip +`python -m` you use this to ensure that you are calling the version of pip associated with your current active environment. +::: + +After installing your package, type “python” at the command prompt to start +a Python session in your active python environment. + +You can now import your package and access the `add_num` function. + +```bash +➜ python +Python 3.11.4 | packaged by conda-forge | (main, Jun 10 2023, 18:08:41) [Clang 15.0.7 ] on darwin +Type "help", "copyright", "credits" or "license" for more information. +>>> import pyospackage +>>> pyospackage.add_num(1, 2) +3 +``` + +## Customize access to Python functions using the `__init__.py` file + +> TODO: note that they may or may not understand modules etc... I have some text on this but make sure to link to that in the intro tutorials on the basics of packaging. Could this also introduce problematic behavior if users are learning and just add functions to the init file...it could... + +Let's make one more tweak to the code. + +If `add_num` is a function that you think users will use often, you may want to add it to your `__init__.py` file to allow them to import the function directly from the package rather than from the module. + +### Add functions to your `__init__.py` file + +To make a function or class available at the package level to a user, you can add it to the `__init__.py` file. + +- Open the `__init__.py` file . +- At the top of the file add the import below. + +```python +from pyospackage.add_numbers import add_num +``` + +Save the file. + +Now, open up a NEW python terminal or restart your Python kernel. + +It's important that you restart your Python kernel if you wish to access the changes to your code that you just made. +{: .notice .notice--important } + +```python +> python +Python 3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:41:52) [Clang 15.0.7 ] on darwin +Type "help", "copyright", "credits" or "license" for more information. +>>> from pyospackage import add_num +>>> add_num(1,2) +3 +``` + +The decision to add specific functions, methods or classes to your +`__init__.py` file is up to you. However be sure that you do this thoughtfully +considering what functionality in your package you want to "elevate" to the top +level vs. what makes the most sense to keep in individual modules. + +> TODO: Guidelines for when and how to add methods and such to the `__init__.py` file here. + +> `__all__ = ['add_num']` + +### Congratulations! You created (the beginning of) your first Python package + +You did it! You have now created a Python package that you can install into any Python environment. While there is still more to do, you have completed the first major step. + +In the next lesson you will: + +1. Flesh out metadata for your package within your `pyproject.toml` file. +2. Learn how to build your learn how to build your package distribution files (**sdist** and **wheel**) and +3. Learn how to publish your package on **testpypi**. + +If you have a package that is ready for the mainstream user then +you can also publish your package on PyPI. + + diff --git a/tutorials/intro.md b/tutorials/intro.md new file mode 100644 index 00000000..af3a41c4 --- /dev/null +++ b/tutorials/intro.md @@ -0,0 +1,109 @@ +# Python package tutorial + +TODO: glob misses the landing page in the toctree? + +```{toctree} +:hidden: +:caption: Tutorials +:glob: +What is a package? +* + +``` + +:::{admonition} Learning Objectives + +## Learning objectives + +After reading this lesson you will: + +- Understand what a Python package is +- Be able to list the 5 core components of a Python package +- You will be able to explain the difference between generalizable code and code that supports a specific scientific application + +::: + +## What is a Python package? + +A Python package is a collection of related modules / code containing functions, classes and methods that are organized together in a directory. Packages allow you to group and structure your Python code, making it easier to manage and reuse code across different projects. + +Ideally the code in a published Python package is generalizable. This means it can be applied in different settings. + +An example of a package that has a generalized scope is matplotlib. It does one (big important) thing really well: + +It creates visual plots of data. + +Matplotlib is used by thousands of users for a host of different plotting applications. While few scientific packages will have the same broad application as tools like matplotlib or numpy, the +idea of code being used for something more than a single workflow still applies. + +
+ ADD ALT +
Caption here
+
+ +A Python package can be installed into a Python environment. This allows you to access it from any code run with that specific Python environment activated. + +## Why create a package? + +There are numerous reasons why you might create a Python package: + +- **Use your code across multiple projects:** At its most basic level, creating a package allows you to install your code into a Python environment. This allows you to then import functions and classes into any workflows both locally and in the cloud.. +- **Share your code:** Sharing your code with others is often a common reason to create a package.If you publish a package on a public repository such as PyPI or conda, immediately your package can be installed on any machine using pip or conda with a single command. +- **Build community around your code:** Packages make it easier for multiple people to work on the same project (particularly when published on Github). A version platform such as git (the version control system used by GitHub), further makes it easier to track changes to the codebase over time. Tools such as issues and pull requests make it easier for outside users to contribute bug fixes and to establish review processes for accepting changes to the code base.. +- **Organize your code:** Packages can be used to organize large code projects, dividing them into smaller, more manageable components. This structure can help with both maintaining the codebase and with making it easier to understand. + +## What to consider before you create a package + +Creating a python package that others use takes considerable time and effort. Before you begin, think about your goals including: + +- Who you think will use your package +- Whether you have time to add things such as documentation and tests +- How long you might be able to maintain it + +### The elements of a Python package + +A package in any language is more than just code. If your package is public facing, meaning people besides yourself will use it, you will want to think about the various elements of a package that make it a useful community resource. + +Most python packages live in an online version control platform such as GitHub or GitLab. These platforms support robust infrastructure including continuous integration and continuous deployment (CI/CD). + +
+ ADD ALT +
Caption here
+
+ +The core elements of Python package include: + +- **Code:** Generalized code that performs operations that you may need to complete multiple times +- **Documentation:** documentation with tutorials / examples that help users get started using your tool + - Documentation also helps people to contribute to your package. +- **Tests:** that makes sure your code works as it should and makes it easier for you and others to contribute to, modify and update the code in the future +- **License:** An open source license …. - link to choose a license… +- **Infrastructure** that automates updates, publication workflows and runs test suites + +If you intend for others to use and contribute to your code, consider who will maintain it over time. You will want a **contributing / development** guide to help new potential contributors get started with contributing to your package. And a **code of conduct** to ensure community interactions remain healthy both for you and your contributors / maintainer team + +_Link to our EiC checklist for core components that pyOpenSci looks for… _ + +## Yay, your package has users! Now what? + +As the community using your package grows, you may also find yourself managing users, contributors and others who want to interact with your package. It’s important to consider all this before you dive into development. Once you have a user base in the community, people will depend upon your code to work and will need direction regarding how to use it. + +## Development guidelines + +_TODO: I suspect nick murphy has something in this space that we can rif off of. Essentially I'm thinking here we have a SHORT section on development best practices and then link to another resource (be it on the pyos website or elsewhere). This would include things like:_ + +- _Defining the scope of your package and scope creep_ +- _Keeping functions focused on doing a single thing well._ +- _Broad generalizable get started guidelines and then we can link to more in depth resources in this space._ + +## Research compendia + +Goal: The goal of this section is really to distinguish a public facing package from a more specific set of scripts and even a package that is intended to support a single workflow / paper/ etc. it likely needs a better heading as this term research compendia is a specific term that not everyone will know. + +On the other hand, if your code is intended to be used for a specific purpose such as supporting an open reproducible science workflow and subsequent publication, it may have a more specific application. In this case you might create a package for your own use or your lab’s use. However this package may be less useful to others in the community other than when trying to reproduce your results. In this case having documentation and getting started guides is important. However you may not need as much robust infrastructure around your package given you don’t intend to maintain it after publication or for others to maintain it over time. + +## What's next? + +In future lessons we will talk more about the infrastructure around a published python package that makes it both easier to maintain, easier for others to contribute to and easier for other scientists to use. However, first we want to get you to your initial goal of publishing a python package. + +In this next lesson you will learn how to create a basic installable Python package.