From eab61af92d83fcc4ef29d956b9d18b52cef5a1e0 Mon Sep 17 00:00:00 2001 From: David Shean Date: Mon, 24 Apr 2023 10:57:44 -0700 Subject: [PATCH] Initial commit --- .editorconfig | 35 + .github/workflows/publish.yml | 62 + .gitignore | 144 ++ CODE_OF_CONDUCT.md | 128 ++ CONTRIBUTING.md | 57 + LICENSE | 21 + README.md | 16 + book/_config.yml | 35 + book/_toc.yml | 36 + book/chapters/about.ipynb | 64 + book/chapters/conclusion.ipynb | 56 + book/chapters/data.ipynb | 42 + book/chapters/development.ipynb | 42 + book/chapters/evaluation.ipynb | 42 + book/chapters/example.ipynb | 1921 +++++++++++++++++++++++++++ book/chapters/methods.ipynb | 42 + book/chapters/motivation.ipynb | 42 + book/chapters/questions.ipynb | 42 + book/chapters/reproducibility.ipynb | 42 + book/chapters/todo.ipynb | 42 + book/chapters/training.ipynb | 42 + book/chapters/troubleshooting.ipynb | 66 + book/chapters/util.py | 6 + book/chapters/workflow.ipynb | 43 + book/img/GeoSMART_logo.svg | 19 + book/img/logo.png | Bin 0 -> 7957 bytes book/img/use_case_badge.svg | 33 + book/intro.md | 3 + book/reference/bibliography.md | 5 + book/reference/glossary.md | 65 + book/references.bib | 33 + environment.yml | 10 + 32 files changed, 3236 insertions(+) create mode 100644 .editorconfig create mode 100644 .github/workflows/publish.yml create mode 100644 .gitignore create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 book/_config.yml create mode 100644 book/_toc.yml create mode 100644 book/chapters/about.ipynb create mode 100644 book/chapters/conclusion.ipynb create mode 100644 book/chapters/data.ipynb create mode 100644 book/chapters/development.ipynb create mode 100644 book/chapters/evaluation.ipynb create mode 100644 book/chapters/example.ipynb create mode 100644 book/chapters/methods.ipynb create mode 100644 book/chapters/motivation.ipynb create mode 100644 book/chapters/questions.ipynb create mode 100644 book/chapters/reproducibility.ipynb create mode 100644 book/chapters/todo.ipynb create mode 100644 book/chapters/training.ipynb create mode 100644 book/chapters/troubleshooting.ipynb create mode 100644 book/chapters/util.py create mode 100644 book/chapters/workflow.ipynb create mode 100644 book/img/GeoSMART_logo.svg create mode 100644 book/img/logo.png create mode 100644 book/img/use_case_badge.svg create mode 100644 book/intro.md create mode 100644 book/reference/bibliography.md create mode 100644 book/reference/glossary.md create mode 100644 book/references.bib create mode 100644 environment.yml diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..48b11d2 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,35 @@ +# https://EditorConfig.org + +root = true + +# Unix-style newlines with a newline ending every file +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_style = space +indent_size = 4 + +[*.ipynb] +# Content is json, but it seems to be minimally formatted +indent_size = unset + +[*.js] +indent_size = 2 + +[*.json] +indent_size = 2 + +[*.md] +indent_size = unset + +[*.py] +# yapf and black will use indents other than 4 spaces +indent_size = unset + +[*.rst] +indent_size = unset + +[{*.yml, *.yaml}] +indent_size = 2 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..8c1246e --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,62 @@ +name: Deploy JupyterBook to GitHub Pages + +on: + # Runs on pushes targeting the default branch + push: + branches: ["main"] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow one concurrent deployment +concurrency: + group: "pages" + cancel-in-progress: true + +jobs: + # Build JupyterBook Website + build: + runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Install Conda environment with Micromamba + uses: mamba-org/provision-with-micromamba@v15 + with: + cache-env: true + + - name: Build the Book + run: jb build book + + - name: Upload artifact + uses: actions/upload-pages-artifact@v1 + with: + path: book/_build/html + + + # Publish Website to GitHub Pages if built successfully + deploy: + needs: build + if: github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + + steps: + - name: Setup Pages + uses: actions/configure-pages@v3 + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v1 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9d48f08 --- /dev/null +++ b/.gitignore @@ -0,0 +1,144 @@ +.DS_Store + +# Jupyter Book things +.bash_history +*_build +.jupyter-server-log.txt +.config/ +.jupyter/ +.local/ +.viminfo + +# cookiecutter webpage things +cookiecutter.json +/book/_build/html/assets + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ \ No newline at end of file diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..61c150f --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +uwhackweek@gmail.com. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..2281341 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,57 @@ +# Contributing + +Contributions are welcome, and they are greatly appreciated! Every little bit +helps, and credit will always be given. + +- Report bugs, request features or submit feedback as a [GitHub Issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/about-issues). +- Make fixes, add content or improvements using [GitHub Pull Requests](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) + +Ready to contribute? Here's a quick guide + + +## Contributing website changes + +To build our website, we need specific Python packages which are managed with the `conda` and `mamba` tools. If you already do not already have those tools installed, we recommend using the [Mambaforge Installer](https://github.com/conda-forge/miniforge#mambaforge): + + +1. Fork this geosmart's website repo on GitHub. + +1. Clone your fork locally: + + ```sh + git clone https://github.com/geo-smart/use_case_template.git + cd book + ``` + +1. Create a branch to add your changes: + + ```sh + git checkout -b name-of-your-bugfix-or-feature + ``` + +1. Create and activate the "geosmart" conda environment. + + __NOTE__: If you're running linux or Windows use `conda/conda-linux-64.lock.yml` + ```sh + mamba env create --name geosmart --file conda/conda-osx-64.lock.yml + mamba activate geosmart + ``` + __NOTE__: If you want to add packages or change the environment, + you must follow the procedure outlined in [./conda/README.md](./conda/README.md). + +1. Make your desired changes and build the book locally + + ```sh + jb build book + ``` + __NOTE__: to preview the changes open `book/build/html/index.html` + +1. Push your branch to GitHub when you're ready: + + ```sh + git add . + git commit -m "Your detailed description of your changes." + git push origin name-of-your-bugfix-or-feature + ``` + +1. Open a pull request through the GitHub website: https://github.com/geo-smart/use_case_template diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..da17f1a --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 uwhackweek + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..b2bcac5 --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +# Geosmart Use Case Jupyter Book + +[![Deploy](https://github.com/geo-smart/use_case_template/actions/workflows/deploy.yaml/badge.svg)](https://github.com/geo-smart/use_case_template/actions/workflows/deploy.yaml) +[![Jupyter Book Badge](https://jupyterbook.org/badge.svg)](https://geo-smart.github.io/simple-template) +[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/geo-smart/simple-template/HEAD?labpath=book%2Fchapters) +[![GeoSMART Use Case](./book/img/use_case_badge.svg)](https://geo-smart.github.io/usecases) + +This repository stores a skeleton of a GeoSMART use case book.
+ +1. Click "Use This Template" and name your repository + +2. In your repository edit book/_config.yml + +3. Under your repository Settings --> Pages --> Source = GitHub Actions + +3. Edit environment.yml, modify notebooks, and your JupyterBook will be published for you! diff --git a/book/_config.yml b/book/_config.yml new file mode 100644 index 0000000..b54cb97 --- /dev/null +++ b/book/_config.yml @@ -0,0 +1,35 @@ +# Book settings +# Learn more at https://jupyterbook.org/customize/config.html + +title: "Use Case Template" +author: eScience Institute, University of Washington +logo: img/GeoSMART_logo.svg +only_build_toc_files: true + +# Information about where the book exists on the web +repository: + url: https://github.com/geo-smart/simple-template # Online location of your book + path_to_book: book # Optional path to your book, relative to the repository root + branch: main # Which branch of the repository should be used when creating links (optional) + +# Force re-execution of notebooks on each build. +# See https://jupyterbook.org/content/execute.html +execute: + execute_notebooks: 'force' + +html: + use_edit_page_button: true + use_issues_button: true + use_repository_button: true + home_page_in_navbar: false + +# Configure your Binder links, such as the URL of the BinderHub. +launch_buttons: + notebook_interface: jupyterlab + binderhub_url: "https://mybinder.org" + +sphinx: + config: + # Required for PlotLy plots? + html_js_files: + - https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js diff --git a/book/_toc.yml b/book/_toc.yml new file mode 100644 index 0000000..fc48976 --- /dev/null +++ b/book/_toc.yml @@ -0,0 +1,36 @@ +# Table of contents +# Learn more at https://jupyterbook.org/customize/toc.html +root: intro +format: jb-book +parts: +- caption: About + chapters: + - title: Geosmart Website + url: https://geo-smart.github.io/index.html + - title: Project Pythia Foundations + url: https://foundations.projectpythia.org/landing-page.html + - title: About Use Case Library + file: chapters/about + +- caption: Tutorial + chapters: + - file: chapters/motivation + - file: chapters/methods + - file: chapters/data + - file: chapters/development + - file: chapters/training + - file: chapters/evaluation + - file: chapters/workflow + - file: chapters/reproducibility + - file: chapters/conclusion + - file: chapters/todo + - file: chapters/questions + - file: chapters/troubleshooting + +- caption: Example Notebook + chapters: + - file: chapters/example + +- caption: References + chapters: + - file: reference/glossary diff --git a/book/chapters/about.ipynb b/book/chapters/about.ipynb new file mode 100644 index 0000000..4617889 --- /dev/null +++ b/book/chapters/about.ipynb @@ -0,0 +1,64 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# About the GeoSMART Use Case Library\n", + "\n", + "### General Overview\n", + "\n", + "Explain this family of books, link to others, explain how to contribute and purpose. Books in the library can be identified by the badge:\n", + "\n", + "[![GeoSMART Use Case](../img/use_case_badge.svg)](https://geo-smart.github.io/usecases)\n", + "\n", + "### Contributing Content\n", + "\n", + "Tutorial content can be integrated into jupyterbooks in one of two ways:\n", + "* Do it yourself (use this template book and add your content)\n", + "* Provide use your content (preferably in a github repo) and we will integrate it\n", + "\n", + "The goal is to provide executable code on some platform. The contributor can choose between:\n", + "* Binder\n", + "* Google Colab\n", + "* Free AWS (smaller cloud-based examples)\n", + "If none of the above options work for you, please contact us directly to discuss further.\n", + "\n", + "### Technical Details\n", + "\n", + "Creating a use case book can either be done by navigating use case template repository and clicking the \"use as template\" button.\n", + "\n", + "The `.github` folder already contains the github actions that will handle CI/CD deployment to github pages. There is no need to create a gh-pages branch, the first run of the github actions should handle that automatically." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0 (tags/v3.9.0:9cf6752, Oct 5 2020, 15:23:07) [MSC v.1927 32 bit (Intel)]" + }, + "vscode": { + "interpreter": { + "hash": "c446eef832ec964573dc49f36fd16bdbed40cbfbefbf557bc2dc78d9e7968689" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/book/chapters/conclusion.ipynb b/book/chapters/conclusion.ipynb new file mode 100644 index 0000000..68ccb1f --- /dev/null +++ b/book/chapters/conclusion.ipynb @@ -0,0 +1,56 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Discussion / Conclusion \n", + "\n", + "### Conclusion\n", + "\n", + "Is ML the right tool for the research?/How to identify the best ML tool for my problem? \n", + "Can a simple model be as good as a complex one? ML model discovery \t\t\t\n", + "Do I have enough confidence to extrapolate the results? Is the model transferable?\t\n", + "\n", + "### Discussion\n", + "\n", + "This chapter has two parts: lessons learnt and open questions. \n", + "\n", + "Learns learnt should summerize the new stuff we learn from this use case of AI. What new contribution does AI give to solve this problem? Is it good enough to achieve your expected goal? What part of work is unexpected before you dive in? Do you think the model can work in your production environment? etc.\n", + "\n", + "Open questions should focus on future possibilities like if your team wants to adopt this model, what else you should do to make it fully work? How should we better tackle the data bias problem? How should we address the generalization issue on spatial and temporal extent in practice?\n", + "\n", + "Please elaborate a little bit on these questions with your real thoughts, which will be very helpful for us to tell the final story to students. \n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0 (tags/v3.9.0:9cf6752, Oct 5 2020, 15:23:07) [MSC v.1927 32 bit (Intel)]" + }, + "vscode": { + "interpreter": { + "hash": "c446eef832ec964573dc49f36fd16bdbed40cbfbefbf557bc2dc78d9e7968689" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/book/chapters/data.ipynb b/book/chapters/data.ipynb new file mode 100644 index 0000000..c30086e --- /dev/null +++ b/book/chapters/data.ipynb @@ -0,0 +1,42 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Data Preparation\n", + "\n", + "Data description include source, size, type, attributes, modality, etc. Data retrieval from community data centers, personal cloud storage, or published datasets. Feature extraction and engineering.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0 (tags/v3.9.0:9cf6752, Oct 5 2020, 15:23:07) [MSC v.1927 32 bit (Intel)]" + }, + "vscode": { + "interpreter": { + "hash": "c446eef832ec964573dc49f36fd16bdbed40cbfbefbf557bc2dc78d9e7968689" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/book/chapters/development.ipynb b/book/chapters/development.ipynb new file mode 100644 index 0000000..b6afd01 --- /dev/null +++ b/book/chapters/development.ipynb @@ -0,0 +1,42 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Model Development and Paramter Tuning\n", + "\n", + "Explain the effects of parameter tuning and how it was performed. If you used a tool to perform automatic tuning , explaing that as well. If it is the case, explain model under or over fitting and the consequences.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0 (tags/v3.9.0:9cf6752, Oct 5 2020, 15:23:07) [MSC v.1927 32 bit (Intel)]" + }, + "vscode": { + "interpreter": { + "hash": "c446eef832ec964573dc49f36fd16bdbed40cbfbefbf557bc2dc78d9e7968689" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/book/chapters/evaluation.ipynb b/book/chapters/evaluation.ipynb new file mode 100644 index 0000000..09280a1 --- /dev/null +++ b/book/chapters/evaluation.ipynb @@ -0,0 +1,42 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Performance Evaluation\n", + "\n", + "Testing of the model on independent datasets.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0 (tags/v3.9.0:9cf6752, Oct 5 2020, 15:23:07) [MSC v.1927 32 bit (Intel)]" + }, + "vscode": { + "interpreter": { + "hash": "c446eef832ec964573dc49f36fd16bdbed40cbfbefbf557bc2dc78d9e7968689" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/book/chapters/example.ipynb b/book/chapters/example.ipynb new file mode 100644 index 0000000..c0ee06e --- /dev/null +++ b/book/chapters/example.ipynb @@ -0,0 +1,1921 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Sample Jupyter Notebook" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Demo of executability below.\n", + "Loading and plotting sample data from plotly express." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "customdata": [ + [ + 0.2 + ], + [ + 0.2 + ], + [ + 0.2 + ], + [ + 0.2 + ], + [ + 0.2 + ], + [ + 0.4 + ], + [ + 0.3 + ], + [ + 0.2 + ], + [ + 0.2 + ], + [ + 0.1 + ], + [ + 0.2 + ], + [ + 0.2 + ], + [ + 0.1 + ], + [ + 0.1 + ], + [ + 0.2 + ], + [ + 0.4 + ], + [ + 0.4 + ], + [ + 0.3 + ], + [ + 0.3 + ], + [ + 0.3 + ], + [ + 0.2 + ], + [ + 0.4 + ], + [ + 0.2 + ], + [ + 0.5 + ], + [ + 0.2 + ], + [ + 0.2 + ], + [ + 0.4 + ], + [ + 0.2 + ], + [ + 0.2 + ], + [ + 0.2 + ], + [ + 0.2 + ], + [ + 0.4 + ], + [ + 0.1 + ], + [ + 0.2 + ], + [ + 0.1 + ], + [ + 0.2 + ], + [ + 0.2 + ], + [ + 0.1 + ], + [ + 0.2 + ], + [ + 0.2 + ], + [ + 0.3 + ], + [ + 0.3 + ], + [ + 0.2 + ], + [ + 0.6 + ], + [ + 0.4 + ], + [ + 0.3 + ], + [ + 0.2 + ], + [ + 0.2 + ], + [ + 0.2 + ], + [ + 0.2 + ] + ], + "hovertemplate": "species=setosa
sepal_width=%{x}
sepal_length=%{y}
petal_length=%{marker.size}
petal_width=%{customdata[0]}", + "legendgroup": "setosa", + "marker": { + "color": "#636efa", + "size": [ + 1.4, + 1.4, + 1.3, + 1.5, + 1.4, + 1.7, + 1.4, + 1.5, + 1.4, + 1.5, + 1.5, + 1.6, + 1.4, + 1.1, + 1.2, + 1.5, + 1.3, + 1.4, + 1.7, + 1.5, + 1.7, + 1.5, + 1, + 1.7, + 1.9, + 1.6, + 1.6, + 1.5, + 1.4, + 1.6, + 1.6, + 1.5, + 1.5, + 1.4, + 1.5, + 1.2, + 1.3, + 1.5, + 1.3, + 1.5, + 1.3, + 1.3, + 1.3, + 1.6, + 1.9, + 1.4, + 1.6, + 1.4, + 1.5, + 1.4 + ], + "sizemode": "area", + "sizeref": 0.01725, + "symbol": "circle" + }, + "mode": "markers", + "name": "setosa", + "orientation": "v", + "showlegend": true, + "type": "scatter", + "x": [ + 3.5, + 3, + 3.2, + 3.1, + 3.6, + 3.9, + 3.4, + 3.4, + 2.9, + 3.1, + 3.7, + 3.4, + 3, + 3, + 4, + 4.4, + 3.9, + 3.5, + 3.8, + 3.8, + 3.4, + 3.7, + 3.6, + 3.3, + 3.4, + 3, + 3.4, + 3.5, + 3.4, + 3.2, + 3.1, + 3.4, + 4.1, + 4.2, + 3.1, + 3.2, + 3.5, + 3.1, + 3, + 3.4, + 3.5, + 2.3, + 3.2, + 3.5, + 3.8, + 3, + 3.8, + 3.2, + 3.7, + 3.3 + ], + "xaxis": "x", + "y": [ + 5.1, + 4.9, + 4.7, + 4.6, + 5, + 5.4, + 4.6, + 5, + 4.4, + 4.9, + 5.4, + 4.8, + 4.8, + 4.3, + 5.8, + 5.7, + 5.4, + 5.1, + 5.7, + 5.1, + 5.4, + 5.1, + 4.6, + 5.1, + 4.8, + 5, + 5, + 5.2, + 5.2, + 4.7, + 4.8, + 5.4, + 5.2, + 5.5, + 4.9, + 5, + 5.5, + 4.9, + 4.4, + 5.1, + 5, + 4.5, + 4.4, + 5, + 5.1, + 4.8, + 5.1, + 4.6, + 5.3, + 5 + ], + "yaxis": "y" + }, + { + "customdata": [ + [ + 1.4 + ], + [ + 1.5 + ], + [ + 1.5 + ], + [ + 1.3 + ], + [ + 1.5 + ], + [ + 1.3 + ], + [ + 1.6 + ], + [ + 1 + ], + [ + 1.3 + ], + [ + 1.4 + ], + [ + 1 + ], + [ + 1.5 + ], + [ + 1 + ], + [ + 1.4 + ], + [ + 1.3 + ], + [ + 1.4 + ], + [ + 1.5 + ], + [ + 1 + ], + [ + 1.5 + ], + [ + 1.1 + ], + [ + 1.8 + ], + [ + 1.3 + ], + [ + 1.5 + ], + [ + 1.2 + ], + [ + 1.3 + ], + [ + 1.4 + ], + [ + 1.4 + ], + [ + 1.7 + ], + [ + 1.5 + ], + [ + 1 + ], + [ + 1.1 + ], + [ + 1 + ], + [ + 1.2 + ], + [ + 1.6 + ], + [ + 1.5 + ], + [ + 1.6 + ], + [ + 1.5 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.2 + ], + [ + 1.4 + ], + [ + 1.2 + ], + [ + 1 + ], + [ + 1.3 + ], + [ + 1.2 + ], + [ + 1.3 + ], + [ + 1.3 + ], + [ + 1.1 + ], + [ + 1.3 + ] + ], + "hovertemplate": "species=versicolor
sepal_width=%{x}
sepal_length=%{y}
petal_length=%{marker.size}
petal_width=%{customdata[0]}", + "legendgroup": "versicolor", + "marker": { + "color": "#EF553B", + "size": [ + 4.7, + 4.5, + 4.9, + 4, + 4.6, + 4.5, + 4.7, + 3.3, + 4.6, + 3.9, + 3.5, + 4.2, + 4, + 4.7, + 3.6, + 4.4, + 4.5, + 4.1, + 4.5, + 3.9, + 4.8, + 4, + 4.9, + 4.7, + 4.3, + 4.4, + 4.8, + 5, + 4.5, + 3.5, + 3.8, + 3.7, + 3.9, + 5.1, + 4.5, + 4.5, + 4.7, + 4.4, + 4.1, + 4, + 4.4, + 4.6, + 4, + 3.3, + 4.2, + 4.2, + 4.2, + 4.3, + 3, + 4.1 + ], + "sizemode": "area", + "sizeref": 0.01725, + "symbol": "circle" + }, + "mode": "markers", + "name": "versicolor", + "orientation": "v", + "showlegend": true, + "type": "scatter", + "x": [ + 3.2, + 3.2, + 3.1, + 2.3, + 2.8, + 2.8, + 3.3, + 2.4, + 2.9, + 2.7, + 2, + 3, + 2.2, + 2.9, + 2.9, + 3.1, + 3, + 2.7, + 2.2, + 2.5, + 3.2, + 2.8, + 2.5, + 2.8, + 2.9, + 3, + 2.8, + 3, + 2.9, + 2.6, + 2.4, + 2.4, + 2.7, + 2.7, + 3, + 3.4, + 3.1, + 2.3, + 3, + 2.5, + 2.6, + 3, + 2.6, + 2.3, + 2.7, + 3, + 2.9, + 2.9, + 2.5, + 2.8 + ], + "xaxis": "x", + "y": [ + 7, + 6.4, + 6.9, + 5.5, + 6.5, + 5.7, + 6.3, + 4.9, + 6.6, + 5.2, + 5, + 5.9, + 6, + 6.1, + 5.6, + 6.7, + 5.6, + 5.8, + 6.2, + 5.6, + 5.9, + 6.1, + 6.3, + 6.1, + 6.4, + 6.6, + 6.8, + 6.7, + 6, + 5.7, + 5.5, + 5.5, + 5.8, + 6, + 5.4, + 6, + 6.7, + 6.3, + 5.6, + 5.5, + 5.5, + 6.1, + 5.8, + 5, + 5.6, + 5.7, + 5.7, + 6.2, + 5.1, + 5.7 + ], + "yaxis": "y" + }, + { + "customdata": [ + [ + 2.5 + ], + [ + 1.9 + ], + [ + 2.1 + ], + [ + 1.8 + ], + [ + 2.2 + ], + [ + 2.1 + ], + [ + 1.7 + ], + [ + 1.8 + ], + [ + 1.8 + ], + [ + 2.5 + ], + [ + 2 + ], + [ + 1.9 + ], + [ + 2.1 + ], + [ + 2 + ], + [ + 2.4 + ], + [ + 2.3 + ], + [ + 1.8 + ], + [ + 2.2 + ], + [ + 2.3 + ], + [ + 1.5 + ], + [ + 2.3 + ], + [ + 2 + ], + [ + 2 + ], + [ + 1.8 + ], + [ + 2.1 + ], + [ + 1.8 + ], + [ + 1.8 + ], + [ + 1.8 + ], + [ + 2.1 + ], + [ + 1.6 + ], + [ + 1.9 + ], + [ + 2 + ], + [ + 2.2 + ], + [ + 1.5 + ], + [ + 1.4 + ], + [ + 2.3 + ], + [ + 2.4 + ], + [ + 1.8 + ], + [ + 1.8 + ], + [ + 2.1 + ], + [ + 2.4 + ], + [ + 2.3 + ], + [ + 1.9 + ], + [ + 2.3 + ], + [ + 2.5 + ], + [ + 2.3 + ], + [ + 1.9 + ], + [ + 2 + ], + [ + 2.3 + ], + [ + 1.8 + ] + ], + "hovertemplate": "species=virginica
sepal_width=%{x}
sepal_length=%{y}
petal_length=%{marker.size}
petal_width=%{customdata[0]}", + "legendgroup": "virginica", + "marker": { + "color": "#00cc96", + "size": [ + 6, + 5.1, + 5.9, + 5.6, + 5.8, + 6.6, + 4.5, + 6.3, + 5.8, + 6.1, + 5.1, + 5.3, + 5.5, + 5, + 5.1, + 5.3, + 5.5, + 6.7, + 6.9, + 5, + 5.7, + 4.9, + 6.7, + 4.9, + 5.7, + 6, + 4.8, + 4.9, + 5.6, + 5.8, + 6.1, + 6.4, + 5.6, + 5.1, + 5.6, + 6.1, + 5.6, + 5.5, + 4.8, + 5.4, + 5.6, + 5.1, + 5.1, + 5.9, + 5.7, + 5.2, + 5, + 5.2, + 5.4, + 5.1 + ], + "sizemode": "area", + "sizeref": 0.01725, + "symbol": "circle" + }, + "mode": "markers", + "name": "virginica", + "orientation": "v", + "showlegend": true, + "type": "scatter", + "x": [ + 3.3, + 2.7, + 3, + 2.9, + 3, + 3, + 2.5, + 2.9, + 2.5, + 3.6, + 3.2, + 2.7, + 3, + 2.5, + 2.8, + 3.2, + 3, + 3.8, + 2.6, + 2.2, + 3.2, + 2.8, + 2.8, + 2.7, + 3.3, + 3.2, + 2.8, + 3, + 2.8, + 3, + 2.8, + 3.8, + 2.8, + 2.8, + 2.6, + 3, + 3.4, + 3.1, + 3, + 3.1, + 3.1, + 3.1, + 2.7, + 3.2, + 3.3, + 3, + 2.5, + 3, + 3.4, + 3 + ], + "xaxis": "x", + "y": [ + 6.3, + 5.8, + 7.1, + 6.3, + 6.5, + 7.6, + 4.9, + 7.3, + 6.7, + 7.2, + 6.5, + 6.4, + 6.8, + 5.7, + 5.8, + 6.4, + 6.5, + 7.7, + 7.7, + 6, + 6.9, + 5.6, + 7.7, + 6.3, + 6.7, + 7.2, + 6.2, + 6.1, + 6.4, + 7.2, + 7.4, + 7.9, + 6.4, + 6.3, + 6.1, + 7.7, + 6.3, + 6.4, + 6, + 6.9, + 6.7, + 6.9, + 5.8, + 6.8, + 6.7, + 6.7, + 6.3, + 6.5, + 6.2, + 5.9 + ], + "yaxis": "y" + } + ], + "layout": { + "legend": { + "itemsizing": "constant", + "title": { + "text": "species" + }, + "tracegroupgap": 0 + }, + "margin": { + "t": 60 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "title": { + "text": "sepal_width" + } + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 1 + ], + "title": { + "text": "sepal_length" + } + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas\n", + "import plotly.express as px\n", + "df = px.data.iris()\n", + "fig = px.scatter(df, x=\"sepal_width\", y=\"sepal_length\", color=\"species\",\n", + " size='petal_length', hover_data=['petal_width'])\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "15\n" + ] + } + ], + "source": [ + "import util\n", + "\n", + "x = 10\n", + "y = 20\n", + "print(util.dummy_method(x, y))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "geosmart", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + }, + "vscode": { + "interpreter": { + "hash": "a0d8bf954ea6db8eac7eea84f0a01b1ff10874f08007b8fbfb0c71c8847e6862" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/book/chapters/methods.ipynb b/book/chapters/methods.ipynb new file mode 100644 index 0000000..63df639 --- /dev/null +++ b/book/chapters/methods.ipynb @@ -0,0 +1,42 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Machine Learning Methods and Tools\n", + "\n", + "Explain the method and why you think it's suitable for your use case. Explain the choice of tools/packages/data and the reason for use.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0 (tags/v3.9.0:9cf6752, Oct 5 2020, 15:23:07) [MSC v.1927 32 bit (Intel)]" + }, + "vscode": { + "interpreter": { + "hash": "c446eef832ec964573dc49f36fd16bdbed40cbfbefbf557bc2dc78d9e7968689" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/book/chapters/motivation.ipynb b/book/chapters/motivation.ipynb new file mode 100644 index 0000000..8f0ef2a --- /dev/null +++ b/book/chapters/motivation.ipynb @@ -0,0 +1,42 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Motivation (Science or Utility)\n", + "\n", + "Explain the science motivation, data challenge and any existing attempts. Explain why you need ML to solve the problem as opposed to a conventional approach.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0 (tags/v3.9.0:9cf6752, Oct 5 2020, 15:23:07) [MSC v.1927 32 bit (Intel)]" + }, + "vscode": { + "interpreter": { + "hash": "c446eef832ec964573dc49f36fd16bdbed40cbfbefbf557bc2dc78d9e7968689" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/book/chapters/questions.ipynb b/book/chapters/questions.ipynb new file mode 100644 index 0000000..92c141b --- /dev/null +++ b/book/chapters/questions.ipynb @@ -0,0 +1,42 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Open questions\n", + "\n", + "More future oriented tasks that require deeper and long term research, and will have big returns.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0 (tags/v3.9.0:9cf6752, Oct 5 2020, 15:23:07) [MSC v.1927 32 bit (Intel)]" + }, + "vscode": { + "interpreter": { + "hash": "c446eef832ec964573dc49f36fd16bdbed40cbfbefbf557bc2dc78d9e7968689" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/book/chapters/reproducibility.ipynb b/book/chapters/reproducibility.ipynb new file mode 100644 index 0000000..609f7d8 --- /dev/null +++ b/book/chapters/reproducibility.ipynb @@ -0,0 +1,42 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Reproducibility\n", + "\n", + "How to get credit for the work. Steps for getting a DOI. Discuss workflow adaptation to other datasets and science questions if suitable.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0 (tags/v3.9.0:9cf6752, Oct 5 2020, 15:23:07) [MSC v.1927 32 bit (Intel)]" + }, + "vscode": { + "interpreter": { + "hash": "c446eef832ec964573dc49f36fd16bdbed40cbfbefbf557bc2dc78d9e7968689" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/book/chapters/todo.ipynb b/book/chapters/todo.ipynb new file mode 100644 index 0000000..eb1b021 --- /dev/null +++ b/book/chapters/todo.ipynb @@ -0,0 +1,42 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Try something on your own\n", + "\n", + "Interesting and meaningful tasks that the tutorials didn’t do, but you could try and they should be achievable with reasonably small amount of time and effort\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0 (tags/v3.9.0:9cf6752, Oct 5 2020, 15:23:07) [MSC v.1927 32 bit (Intel)]" + }, + "vscode": { + "interpreter": { + "hash": "c446eef832ec964573dc49f36fd16bdbed40cbfbefbf557bc2dc78d9e7968689" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/book/chapters/training.ipynb b/book/chapters/training.ipynb new file mode 100644 index 0000000..ba800a2 --- /dev/null +++ b/book/chapters/training.ipynb @@ -0,0 +1,42 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Model Training\n", + "\n", + "Explain how you split the data into training, testing and validation sets. Explore feature importance. Save the model.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0 (tags/v3.9.0:9cf6752, Oct 5 2020, 15:23:07) [MSC v.1927 32 bit (Intel)]" + }, + "vscode": { + "interpreter": { + "hash": "c446eef832ec964573dc49f36fd16bdbed40cbfbefbf557bc2dc78d9e7968689" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/book/chapters/troubleshooting.ipynb b/book/chapters/troubleshooting.ipynb new file mode 100644 index 0000000..c6b8b67 --- /dev/null +++ b/book/chapters/troubleshooting.ipynb @@ -0,0 +1,66 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Trouble Shooting\n", + "\n", + "This page lists all the issues we have met when creating or following tutorials.\n", + "\n", + "If you have no issues with running the tutorials, you can skip this chapter. It is very likely that some platform specific issues happen now and then, we would love to collect those knowledge to help students in future to avoid wasting time on them. \n", + "\n", + "In this chapter, each section should address one techincal issue/concern. Please list your running environment in many details as possible. The following is an example:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example Issue: Cannot run the `model.train` in Chapter 2. It omits error: \"xxxxx\".\n", + "\n", + "#### Environment\n", + "Machine: Apple M1 laptop\n", + "Python: 3.10\n", + "Conda: 4.12\n", + "Scikit-learn: 1.0.2\n", + "...\n", + "\n", + "#### Code\n", + "Line 1xxx in Chapter 2 (link)\n", + "```\n", + "clf = RandomForestClassifier(max_depth=2, random_state=0)\n", + "clf.train(X, y)\n", + "```\n", + "\n", + "#### Error\n", + "raised Error xxxx\n", + "\n", + "#### Diagnose\n", + "This might be caused by the incompatibility among xxxx\n", + "\n", + "#### Solution\n", + "Please remove xxx, install xxx, and do xxx to try again. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.0 (tags/v3.9.0:9cf6752, Oct 5 2020, 15:23:07) [MSC v.1927 32 bit (Intel)]" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "c446eef832ec964573dc49f36fd16bdbed40cbfbefbf557bc2dc78d9e7968689" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/book/chapters/util.py b/book/chapters/util.py new file mode 100644 index 0000000..a57942f --- /dev/null +++ b/book/chapters/util.py @@ -0,0 +1,6 @@ +''' +This script includes all packages and functions required by book chapters +''' + +def dummy_method(a, b): + return (a + 5) % b \ No newline at end of file diff --git a/book/chapters/workflow.ipynb b/book/chapters/workflow.ipynb new file mode 100644 index 0000000..3a67d0b --- /dev/null +++ b/book/chapters/workflow.ipynb @@ -0,0 +1,43 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Workflow Management / Cloud Computing\n", + "\n", + "Explain cloud-based workflows and compute requirements.\n", + "If you seek to make the model run in production or achieve higher productivity and scalability, use GeoWeaver or other tools.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0 (tags/v3.9.0:9cf6752, Oct 5 2020, 15:23:07) [MSC v.1927 32 bit (Intel)]" + }, + "vscode": { + "interpreter": { + "hash": "c446eef832ec964573dc49f36fd16bdbed40cbfbefbf557bc2dc78d9e7968689" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/book/img/GeoSMART_logo.svg b/book/img/GeoSMART_logo.svg new file mode 100644 index 0000000..d77f37d --- /dev/null +++ b/book/img/GeoSMART_logo.svg @@ -0,0 +1,19 @@ + + + diff --git a/book/img/logo.png b/book/img/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..0dd3061f6b89a98bfb4e933f3e07cfe8f6f7ed07 GIT binary patch literal 7957 zcmV+wAL`(VP)3i1Gvw#zs>p#lbdtj`@847=WXX4h7bb& z2MM5VwgCV2_#cB(@jnKog1OlOGy`eU%3kUjHvS)8{owxJ9|<>^L4t8nMd_uy?CM%m zNmZH2YA&y?#2A?^wU#=Ihbxsr6%iT^Ar-CbE|)2EVc}`Z9W{D)qIdz|El+ z-juo6?r`k-ZqN5e_n*0R9uR`Ch%o^KcZLK42u(8-MNu>r8WcL~uBp?;PEL%~dwz>B zrl}jMnr<9}yuhE&PCs(y`1gnQolieaF*MC^G(}5klK>(JJaV~`l2S(xy?ykMk;D59 z4%P`f2IPF6R-TK_aYkDT6C>4t9+9j_SgLy^ytKV3c ze(7Spt)8K8!hD4ZqA5C2Z+K$;uwe>98N8%m|>^NrgF_UbPp!~|$W2gY>Tvirj!vqqOz z6#tRw0|40U4R@cN5ZI- zs>2`s7!ek3b2?RWxfFRMF_>RkICu4*3rg}iRwhM=c7U*mL_scB>0=YZL&IYtqYcps zX~`)HPL`UOLQxR7#fKCuudb+TXt-RIUs_XcSj z^?Po=OGKEV{U3||GnijoFmBm&o85MudY^y1qHozU!#rqH+sgZJTpAeC#RH$O3;~$(j`@w1R000gil^cx(RmKxpxfk+_ z&SvHDb_b*y%@W|{0uzKhp^C7G&_O9leUlRg8T2usq0wR88W}Gw+VaJYgQt#^mzQ9I zuTTkrD2l5Vu6SbM!+s0w4}-s+IP$=nzhbw<0R$#k6oiE{9#}H-!NeFn03fR%vW z(hG|&l?PW<3lu{DY^1LJc8D8*5D1|*ND~~S9n&Y};VJiMl!^v>LuOv~OYgsW?DSEY zOz!bhFu@BZ&s)E8t$$*7nZbj{56yaXjzX;kfwLnCARH?j(5v6uYc@tkMAqA_=W|bg z^8I_|rs9y`WR)TULR?|0EHuS9oFYQFt0plHSS*6(t-iLl`~1=19dr{K7TRAId8tDT+QeA>5 z>)deeEQ8CQTlvi|-?p}@`~$+`jIq-;u6{wraohL4x&OpojK!d!9zA;8kFnsnMig!7 z7vI9@vXg6iz`Z}Jvy(Vrb5Qb z@_)L>iz3%KMM1Ayx30sBKG?JKjSpU9IZ3vA6DXQqwQ$8h9$8acZ`%Cn+F#E6z|ahY zl*?toSfpr|UMEcp;_e^bYIJrhsyaROH2Txu&HGB1*+e`HC2eJ$o+ZSWVN!0thDV&xD{z-EfQ> z$2dcwp1NEa>-2kygAks&kah36O*W_NqxJ7i`qRX}Z+^C}p^<{rCnX!GamT%xlrvkpSq(BpoFQA61HY$>S^QWQrB5=DnjH$th< zd5DhC4ZG%0LSTriRVtBj!6~VWSyo91s;eu&L{O{ZXj)cY zekeG&heo4snrZ+5l@&+I$`85U1c8u_iyQy*nHrm|!S5(EM!2^je)ltvGc+~of$_`d zESWv-UYoK97M*T5%c!;5Z>H7}vlb$US1b)M+7keeAezt#m zX2JPRri>|y5kC@=Bhk92WPREkM0>@zYZDmb+xz!dNXsHI9Q(p2d(75)0Dz2T5cUzp z@WJ5QpS{nvp*1@X3Q}*bdD-Tu-}CFIZCii-fDj~;g-7Uyb$F$S@zB0~iG*(0*cAxb z{GWZE&vus&*R~~tdByqp1pyesqlf<~IwI<9?x`wsxx}WLgD92Jy?ZaGsE*$;AtW{; zLamXS%=ziWS*xv43x#OZb@rAnN=pV`{pcM|p_F(_0LYZv$I&#s`{x~jiYK}UrVt$5 zD<$OtLOPo%SIA^>-MdT7iJOeIJNEtR`Rw9b+5pWSkLUTrXO7X7)KV*oW@e6?dLsR3 zVR>%greG2p+RtE^Lv=+wjPbCvG>HS5;n*!-Um3{Z!P*w@qZxxaMR`tV2aa@y;04zs zbN>ne*nMzEpfn&tuBhBDN+)a57G@f9 zhfT_z8lRSTEuH;fb!}y}r7|EL5MwbueoFVKQB8hZmmrE_-(&-j#vsMeyN{h}d>=HW zkTtWEn=yDg{jBsy9x`x*Mx{y5JL_~w?arQ=HzwvzooW+t7HA{J`mE~qepu(Are6d=^h#? z(WZeE^?w;z0DvG>Gi&b}gIW1GP_pmEFmy_MQf_&Tr@AFEafUm|+ZhrG!zg6jHEl&v zj9eZnQJyA*O4frA!ra^<&u4GD`Wl0$GtT-QP-_APgkiaYs#@A@NXulwDJc(zg$?Rx zBod7#AtiND&t89x2p{UXm=GlJ^%CW2f?BDRab2k8@Clk@kEJ(uLrZ-_)0fv6EUqkr z(!xOqi?{Z@rE%IaTRah6LNLp!_4@H&y z?e-1=+MNf+xLa^=>w)a4h4l#^tw2~-V+H^;c$tLP?E4G38ML&89|$2sd@=yQYIk{C zMTB6jCZSKC6(z+#8cke=%zO|>$j=u9@u$N;`_C&gK`*{L_dauT;!6%FGtC#EF<-tdEKp zeJMRc5Jj;GiPH%nCet}D5=pnm$|t<>c*HyNG|r^!ty z_L^{rS{>8B|0;}8=Yt3xh($=KRtq{k9TP$@YPHrBg51g)06;=_UDJ107;LaL-~g6X zwkH{ii5Z)px0_tK9c*YQ$jsal8#`91jA9r$A;?{@fckNja@=;aB5W49i?SIyVK+p0MPCzzjSFE0hpoX-MXbkM&7Da>LmIh z645_7;fu8^eg29N;H~AaFsMjY(QH0%wPqW7%vY;o2*FLYTW%xcMsS){T>8YDAHY^6gkid{I|ui3@`9YZ z5+uJio&%x>X^N&h`yvQJE{0JiCeBPsoYAo2uW=j|7Fk?TB&AA~ zqUf_(=kJ?19m39+XE$D^)#!WoemFnxOPjsSol5ttr-p>!l9KPr%6@Wt$Iw%}OH*8M zx1N!a!{zcY2$@cHNq1DLb&?*|+8s4@4I)Be8WqbhHP(93y@J+U?<+(=NVq%eqE%jV zjju7-GchHVQS}ZO4Aj7d6KmF>JkG}*Ee)0UOfjyF@ zJo8FrWwmFm55u)gW?_WW;-VT|h6Y>rkSoESYkc)Jq!DS0Y$!fHq7%1skV3%5JH-wn1+VpOP97foW{NAC(OXVsIT?GYb~ZE7$jz7JXXH7&Z5_KlT!3W2oBOF$H!0Z zFfL7qa4GYBqv;$)vF!(R>l*woDr^FTFisZ^08o0lH(m@TCKF#d`7S{e(B+bXl;{YF z1}4UQw{EE{(_tkbBm@-~eRux+t7dZsA;@E3w!Y3(=eusGDeJ`(rua5OsJgbc z_1}mN*QcwOiEi!hanxQ}JP!aOi0HlzZ)aUDY$3~-fxQ6$yx4Z8Db3JJYA77QBPV|Y z0O%uv17M*0qq!Q*Ag9w@V?5pT0tAHWe2-${PGHguO?!GML%a84qJ69%4G#~E2@P?Z zGHv8Z004t}^_N(42LQO7pOs%y7!%fwWh6zsDag3jLHGj*p~%~tJ{v6-M@J?39U?JG z7aF8-@9WjrCjhAE;t5in${pSpKrKiv|eLg2g z;zW!^an8u0p82G`QJk94gJoHX{xM6l-~9T$&2Aghw}+I?G$3T6qemgXa&?4|(2zc! zL0VT|Ur|*hg+W3{SZKEiBL=&Zk+ib;+S%q~7J~c7&z8Ob3}gJ|{=MNrs-7_s5)>;@ zsbZ6o?(1r`0>)x&>;wo|PyYJ1^mK_CG%Sjb-aF3oxv;Xjg`#Vz^%hQ>CrHj30|0jK z|5^}*A%-|){2y1|~{w$0(FF`NAsz=O!hh-tSC^?cTKYngax z6@v}a-8FhVmQbYP!O3&SbBpp5b)g|@Ns5&a6r@e=)pJQFdm@BXbj+BmJ~hlMFL&6x zT*A#>otizEHgc$EDMY!^tZ0>N@exG%>cDr)pIaeuB?%^|M?&f^AAVCENbVh={mFub=GRRRHn=zw5_+V%yF>gwR#r;KS;KfRW27_fjT}TJ`%s zpJ&{GLYKhH*p@k49}hOX-{6QaT^F9u7br+y%FEpL#mD-HkUsIsR@Me;HlGs*xpn|2;tN!POWVDqf%1tS1#Bc@2yzGFf<{=;o^N-_&ykn z4Ab2jenFq}@;;gmg^IT}}TitEs8<<;isk)9)TN zB*llW$2&hzRko5eO-#y{4_-vWzl0d6p<% z38iG@=jRm_Nwfi1&6zqZE%nWvTPEExp0x6Xz0oFvy3p`h_e>8+V$51+S^nZG2;syb zY5qFP(CIS!3^WjeW8=ng?)nf2VZF_ED*d8F4`fi!h|8{O7fgYL&7jV}~g?wyP~d2yG1|tseqo6c;}+G^C$9BMR#p z8ooMs(C?jmJnqN15raN^>XD+dqI(~kw_wT~PdP$6ucSQ&H7fO|uWWY)!bOqE6#I|t zU%&M=jY2l_*1lag4=JZtsm)c8#H1PFVFNsIK!?lq-EY6O|FrFc5HNCJpUq3=JDiS% zYadOCH{3Do_G*h&?ki`~gbwzbiH(fTD$36<&i7BI7C3t9C`Ge(55L{eJ*=S0Y;y^n z$>%{xSuEM++Kb-yAj2s23DZJD`ns#as5+bN+apKpcDvv7iM(*vumM{iU*h6j_dmTX zw=nm>_PvOZb~NA>@jCSUYs|*Mvu?+}7KZwV2kyQ9g-4&ZIr&cyoiWwhI(%k|V)OI% zn9SZabFgR6#R^44Q!4$3-+tpI1Vk`GOQ+ub__PTQmt*`xvkHszKiRN#)R2)5m#eiK z*HXL=TMrKj+y0NY5eiHd8USGX7au(K@>3iQmfSroEiSUd>oFEA)=N$F5rUEpb5*J+ z4}FD|mERuv)#dUtCqoEPDC93Kp1W%1r1I+WF^eW=<)lx)``%GQMml++L;AX~@7Bf- z{`vaH{|wYJLlKGn(*}IHeoIJjNM5D!Uq{XZ2-+IaPNyj=YunZ0Y8(+hBrbjeA&6ty zgQrjD78Lj|1}~%}#=P_BVtr)9{-Xz1ZdzkBR`*XE@Z**}PF|3)OlNdLXBi|I-?Myn z`lWLnIj5ZkCOAkPwC#nrNAw>G0Qmgy*!z`y^+j_2QA&v9IEaLAF5eo(12o(N56n(D&-gAS+T zuEkUHOY)>I{`7z}z85@m+>AH=z8(NjVyxYNHmA&7$2GSZEtc$pf`5A$lyT~0!@^@3 zmy65F5JG;;8}5Dc(EJHw7K|C05E+q?d+CvvpUlioXIT!3;@9u|YiOTAPM(){y70ei zdtlaDEn}D7TUiyb5_=Diq9A;>X~%7YhqDZOskCbU*&L@JVnRIhSS&dO1$#VuP&e?w zMWzIV+T7@)y&Dn2vW&jF?wv;##YcwQ9QGHtY}&SayG*9=Y)i8D-7kjr8RQa#wl+rb zi7xNMdN%X?tS26@JL~~mdJO<@xf~;J8MR>I?5SfW001sm8gojGS!GoXPM#q1h4fFV z>s3~Ju*0RIs4fQmMG=cAE-q@)twSdb>1T-P&hxJKcJKK3n_VSk1sp4f6m&Z5uRpzc z?t~c*7cb}7E_H2x_5v=eD!*gVB)dHTmUaseKy+ccS5`hhX80&i<#SG1^}Bob@B96< zfB6=Mp~v4c;IV1rW5Pq-tAoW2@4x!te|CDCJ}#$y$NH_~N8jD0%|n~eWd@0Rm*SJ@ zXI5-@DzETzz|U};RA-Ml`2oi z6cP3Wp9O?G;!_l4s77;$W!R0{6*M}&k$>!L6rG2uFlF@OLe zRB1M`G@Y53ZL*j%FJC&Hai-2s=z=v;?l{L_n2F?Fk)WOdJl|)w3bM)#D+24-S{hKKah6Z$9<1OeU9p zG+SU!csX?9$iwTOG}jsfJ3$8=m>{J>wS3Of6$_S$2vLygaF6;95wJ6IEOpim+h5=M z*%pObbCXUlY!?vnF2~@0w`_QPU2j9r0Ge-LHoN`bKYz1++h&ugnv=&|a?|NFqV z8QB*gMcsfS*f7Do)2@#q+`|K^t;_x$p0X=Pb?MF}tPG)+?! z?T1(6iHu={FcPC<^l?#fBl--UGk$tp)Rlt{+S$D~u(%257ZXCP^;VO$_V~Gz$IhMj z@#y|Ks~J)>q$oF`l+U$&Zpsiwn2^T*DwOKs{cjmLaKzY=e+ml@4N_|v34VnX+?*4j zTfkChDJU(Z#stxD4;v1epRc;D2t#FzwKctq2m z->lQKJ@`W~=o^H%cpg9?3&gI(*AIVS2LDU&hjwHCEAamV00960HnNfcS=Onw00000 LNkvXXu0mjf9%N$h literal 0 HcmV?d00001 diff --git a/book/img/use_case_badge.svg b/book/img/use_case_badge.svg new file mode 100644 index 0000000..e101ce4 --- /dev/null +++ b/book/img/use_case_badge.svg @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/book/intro.md b/book/intro.md new file mode 100644 index 0000000..080ec22 --- /dev/null +++ b/book/intro.md @@ -0,0 +1,3 @@ +# Welcome to the GeoSMART Jupyter Book! + +**GeoS**cience **MA**chine Learning **R**esources and **T**raining (GeoSMART) framework provides an educational pathway that provides a foundation in open source scientific ecosystems and progresses through general ML theory, toolkits and deployment on Cloud computing. diff --git a/book/reference/bibliography.md b/book/reference/bibliography.md new file mode 100644 index 0000000..c4375e2 --- /dev/null +++ b/book/reference/bibliography.md @@ -0,0 +1,5 @@ +# Bibliography + +```{bibliography} +:style: plain +``` diff --git a/book/reference/glossary.md b/book/reference/glossary.md new file mode 100644 index 0000000..21f704d --- /dev/null +++ b/book/reference/glossary.md @@ -0,0 +1,65 @@ +# Glossaries + +## Tools and Technology + +```{glossary} +[Binder](https://mybinder.org) + A free, public service for running reproducible interactive computing + environments. Binder is a 100% open source infrastructure that is run by + members of the Jupyter community. The underlying technology behind the Binder + project is BinderHub. + +[BinderHub](https://binderhub.readthedocs.io) + The underlying technology of mybinder.org, BinderHub is an open source tool + that utilizes a JupyterHub`= in order to provide live, reproducible + interactive computing environments that users define on GitHub. + +[Conda](https://docs.conda.io) + Package, dependency and environment management for any language—Python, R, + Ruby, Lua, Scala, Java, JavaScript, C/ C++, FORTRAN, and more. + +[Docker](https://www.docker.com) + Docker provides the ability to package and run an application in a loosely + isolated environment called a container. It is widely used for creating + reproducible software environments to run code on different computers. + +[Git](https://git-scm.com) + A popular version control system that is used in many open source software + projects to manage their software code base. + +[GitHub](https://github.com) + Provider of Internet hosting for software development and distributed version + control using the "git" command line tool. + +[Project Jupyter](https://jupyter.org) + Project Jupyter (name derived from "JUlia PYThon and R") exists to develop + open-source software, open-standards, and services for interactive computing + across dozens of programming languages. + +[Jupyter Book](https://jupyterbook.org/intro.html) + Jupyter Book is an open source project for building beautiful, + publication-quality books and documents from computational material. + +[JupyterHub](https://jupyterhub.readthedocs.io) + A core open source tool from the Jupyter community, JupyterHub allows you to + deploy an application that provides remote data science environments to + multiple users. It can be deployed in the cloud, or on your own hardware. + +[JupyterLab](https://jupyterlab.readthedocs.io) + JupyterLab is the next-generation web-based user interface for Project Jupyter + intended to replace the JupyterNotebook interface. + +[Jupyter Notebook](https://jupyterbook.org) + open-source web application that allows you to create and share documents that + contain live code, equations, visualizations and narrative text. + +[MyST](https://myst-parser.readthedocs.io) + Markedly Structured Text (MyST) is a rich and extensible flavor of Markdown + meant for technical documentation and publishing. It is used by Jupyter Book. + +``` + +## Topical (Use Case Specific) +```{glossary} +< TODO: Fill Out > +``` \ No newline at end of file diff --git a/book/references.bib b/book/references.bib new file mode 100644 index 0000000..926a685 --- /dev/null +++ b/book/references.bib @@ -0,0 +1,33 @@ +@article {Huppenkothen2018, +author = {Huppenkothen, Daniela and Arendt, Anthony and Hogg, David W. and Ram, Karthik and VanderPlas, Jacob T. and Rokem, Ariel}, +title = {Hack weeks as a model for data science education and collaboration}, +volume = {115}, +number = {36}, +pages = {8872--8877}, +year = {2018}, +doi = {10.1073/pnas.1717196115}, +publisher = {National Academy of Sciences}, +issn = {0027-8424}, +journal = {Proceedings of the National Academy of Sciences} +} + +@article{huppenkothen_entrofy_2020, + title = {Entrofy your cohort: {A} transparent method for diverse cohort selection}, + volume = {15}, + issn = {1932-6203}, + shorttitle = {Entrofy your cohort}, + url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0231939}, + doi = {10.1371/journal.pone.0231939}, + abstract = {Selecting a cohort from a set of candidates is a common task within and beyond academia. Admitting students, awarding grants, and choosing speakers for a conference are situations where human biases may affect the selection of any particular candidate, and, thereby the composition of the final cohort. In this paper, we propose a new algorithm, entrofy, designed to be part of a human-in-the-loop decision making strategy aimed at making cohort selection as just, transparent, and accountable as possible. We suggest embedding entrofy in a two-step selection procedure. During a merit review, the committee selects all applicants, submissions, or other entities that meet their merit-based criteria. This often yields a cohort larger than the admissible number. In the second stage, the target cohort can be chosen from this meritorious pool via a new algorithm and software tool called entrofy. entrofy optimizes differences across an assignable set of categories selected by the human committee. Criteria could include academic discipline, home country, experience with certain technologies, or other quantifiable characteristics. The entrofy algorithm then yields the approximation of pre-defined target proportions for each category by solving the tie-breaking problem with provable performance guarantees. We show how entrofy selects cohorts according to pre-determined characteristics in simulated sets of applications and demonstrate its use in a case study of Astro Hack Week. This two stage candidate and cohort selection process allows human judgment and debate to guide the assessment of candidates’ merit in step 1. Then the human committee defines relevant diversity criteria which will be used as computational parameters in entrofy. Once the parameters are defined, the set of candidates who meet the minimum threshold for merit are passed through the entrofy cohort selection procedure in step 2 which yields a cohort of a composition as close as possible to the computational parameters defined by the committee. This process has the benefit of separating the meritorious assessment of candidates from certain elements of their diversity and from some considerations around cohort composition. It also increases the transparency and auditability of the process, which enables, but does not guarantee, fairness. Splitting merit and diversity considerations into their own assessment stages makes it easier to explain why a given candidate was selected or rejected, though it does not eliminate the possibility of objectionable bias.}, + language = {en}, + number = {7}, + urldate = {2021-01-04}, + journal = {PLOS ONE}, + author = {Huppenkothen, Daniela and McFee, Brian and Norén, Laura}, + month = jul, + year = {2020}, + note = {Publisher: Public Library of Science}, + keywords = {Algorithms, Careers, Culture, Decision making, Employment, Human learning, Statistical distributions, Workshops}, + pages = {e0231939}, + file = {Snapshot:C\:\\Users\\arend\\Zotero\\storage\\CYPGLSAA\\article.html:text/html}, +} diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..e167bce --- /dev/null +++ b/environment.yml @@ -0,0 +1,10 @@ +name: geosmart-template +channels: + - conda-forge +dependencies: + - python=3.10 + - sphinxcontrib-bibtex + - jupyter-book=0.13 + - jupytext + - plotly + - pandas