diff --git a/.github/workflows/build-wheels.yaml b/.github/workflows/build-wheels.yaml index c6860e1f6e6..98fddd006cc 100644 --- a/.github/workflows/build-wheels.yaml +++ b/.github/workflows/build-wheels.yaml @@ -47,6 +47,8 @@ jobs: steps: - name: Check out the repo uses: actions/checkout@v4 + with: + submodules: 'true' - name: Set up QEMU if: runner.os == 'Linux' diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml index a90160eb962..cb0f7f065ae 100644 --- a/.github/workflows/doc.yml +++ b/.github/workflows/doc.yml @@ -19,6 +19,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + submodules: 'true' - name: Setup Python uses: actions/setup-python@v5 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 82a52c8ecfa..dbb435d0845 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,6 +14,7 @@ jobs: steps: - uses: actions/checkout@v4 with: + submodules: 'true' fetch-depth: '2' - name: Setup Python diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000000..3842eea91bc --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "doc/visual-programming"] + path = doc/visual-programming + url = https://github.com/biolab/orange3-doc-visual-programming.git diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 2045da41085..f9ae170072e 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -5,6 +5,9 @@ build: tools: python: "3.10" +submodules: + include: all + sphinx: # Path to the shared conf.py file. configuration: doc/conf.py diff --git a/doc/visual-programming b/doc/visual-programming new file mode 160000 index 00000000000..5118f013eda --- /dev/null +++ b/doc/visual-programming @@ -0,0 +1 @@ +Subproject commit 5118f013edaa721438e21824dec0c5faa25fc079 diff --git a/doc/visual-programming/Makefile b/doc/visual-programming/Makefile deleted file mode 100644 index 25039dfc369..00000000000 --- a/doc/visual-programming/Makefile +++ /dev/null @@ -1,192 +0,0 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = build - -# User-friendly check for sphinx-build -ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) -$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) -endif - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source -# the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source - -.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext - -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " singlehtml to make a single large HTML file" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " applehelp to make an Apple Help Book" - @echo " devhelp to make HTML files and a Devhelp project" - @echo " epub to make an epub" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " latexpdf to make LaTeX files and run them through pdflatex" - @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" - @echo " text to make text files" - @echo " man to make manual pages" - @echo " texinfo to make Texinfo files" - @echo " info to make Texinfo files and run them through makeinfo" - @echo " gettext to make PO message catalogs" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " xml to make Docutils-native XML files" - @echo " pseudoxml to make pseudoxml-XML files for display purposes" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - @echo " coverage to run coverage check of the documentation (if enabled)" - -clean: - rm -rf $(BUILDDIR)/* - -html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -singlehtml: - $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml - @echo - @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." - -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/OrangeVisualProgramming.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/OrangeVisualProgramming.qhc" - -applehelp: - $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp - @echo - @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." - @echo "N.B. You won't be able to view it unless you put it in" \ - "~/Library/Documentation/Help or install it in your application" \ - "bundle." - -devhelp: - $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp - @echo - @echo "Build finished." - @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/OrangeVisualProgramming" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/OrangeVisualProgramming" - @echo "# devhelp" - -epub: - $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub - @echo - @echo "Build finished. The epub file is in $(BUILDDIR)/epub." - -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make' in that directory to run these through (pdf)latex" \ - "(use \`make latexpdf' here to do that automatically)." - -latexpdf: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through pdflatex..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -latexpdfja: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through platex and dvipdfmx..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -text: - $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text - @echo - @echo "Build finished. The text files are in $(BUILDDIR)/text." - -man: - $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man - @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man." - -texinfo: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo - @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." - @echo "Run \`make' in that directory to run these through makeinfo" \ - "(use \`make info' here to do that automatically)." - -info: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo "Running Texinfo files through makeinfo..." - make -C $(BUILDDIR)/texinfo info - @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." - -gettext: - $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale - @echo - @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." - -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." - -coverage: - $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage - @echo "Testing of coverage in the sources finished, look at the " \ - "results in $(BUILDDIR)/coverage/python.txt." - -xml: - $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml - @echo - @echo "Build finished. The XML files are in $(BUILDDIR)/xml." - -pseudoxml: - $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml - @echo - @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/doc/visual-programming/make.bat b/doc/visual-programming/make.bat deleted file mode 100644 index 51e25089dba..00000000000 --- a/doc/visual-programming/make.bat +++ /dev/null @@ -1,263 +0,0 @@ -@ECHO OFF - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set BUILDDIR=build -set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source -set I18NSPHINXOPTS=%SPHINXOPTS% source -if NOT "%PAPER%" == "" ( - set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% - set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% -) - -if "%1" == "" goto help - -if "%1" == "help" ( - :help - echo.Please use `make ^` where ^ is one of - echo. html to make standalone HTML files - echo. dirhtml to make HTML files named index.html in directories - echo. singlehtml to make a single large HTML file - echo. pickle to make pickle files - echo. json to make JSON files - echo. htmlhelp to make HTML files and a HTML help project - echo. qthelp to make HTML files and a qthelp project - echo. devhelp to make HTML files and a Devhelp project - echo. epub to make an epub - echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter - echo. text to make text files - echo. man to make manual pages - echo. texinfo to make Texinfo files - echo. gettext to make PO message catalogs - echo. changes to make an overview over all changed/added/deprecated items - echo. xml to make Docutils-native XML files - echo. pseudoxml to make pseudoxml-XML files for display purposes - echo. linkcheck to check all external links for integrity - echo. doctest to run all doctests embedded in the documentation if enabled - echo. coverage to run coverage check of the documentation if enabled - goto end -) - -if "%1" == "clean" ( - for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i - del /q /s %BUILDDIR%\* - goto end -) - - -REM Check if sphinx-build is available and fallback to Python version if any -%SPHINXBUILD% 2> nul -if errorlevel 9009 goto sphinx_python -goto sphinx_ok - -:sphinx_python - -set SPHINXBUILD=python -m sphinx.__init__ -%SPHINXBUILD% 2> nul -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -:sphinx_ok - - -if "%1" == "html" ( - %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/html. - goto end -) - -if "%1" == "dirhtml" ( - %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. - goto end -) - -if "%1" == "singlehtml" ( - %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. - goto end -) - -if "%1" == "pickle" ( - %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can process the pickle files. - goto end -) - -if "%1" == "json" ( - %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can process the JSON files. - goto end -) - -if "%1" == "htmlhelp" ( - %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can run HTML Help Workshop with the ^ -.hhp project file in %BUILDDIR%/htmlhelp. - goto end -) - -if "%1" == "qthelp" ( - %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can run "qcollectiongenerator" with the ^ -.qhcp project file in %BUILDDIR%/qthelp, like this: - echo.^> qcollectiongenerator %BUILDDIR%\qthelp\OrangeVisualProgramming.qhcp - echo.To view the help file: - echo.^> assistant -collectionFile %BUILDDIR%\qthelp\OrangeVisualProgramming.ghc - goto end -) - -if "%1" == "devhelp" ( - %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. - goto end -) - -if "%1" == "epub" ( - %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The epub file is in %BUILDDIR%/epub. - goto end -) - -if "%1" == "latex" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "latexpdf" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - cd %BUILDDIR%/latex - make all-pdf - cd %~dp0 - echo. - echo.Build finished; the PDF files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "latexpdfja" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - cd %BUILDDIR%/latex - make all-pdf-ja - cd %~dp0 - echo. - echo.Build finished; the PDF files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "text" ( - %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The text files are in %BUILDDIR%/text. - goto end -) - -if "%1" == "man" ( - %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The manual pages are in %BUILDDIR%/man. - goto end -) - -if "%1" == "texinfo" ( - %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. - goto end -) - -if "%1" == "gettext" ( - %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The message catalogs are in %BUILDDIR%/locale. - goto end -) - -if "%1" == "changes" ( - %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes - if errorlevel 1 exit /b 1 - echo. - echo.The overview file is in %BUILDDIR%/changes. - goto end -) - -if "%1" == "linkcheck" ( - %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck - if errorlevel 1 exit /b 1 - echo. - echo.Link check complete; look for any errors in the above output ^ -or in %BUILDDIR%/linkcheck/output.txt. - goto end -) - -if "%1" == "doctest" ( - %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest - if errorlevel 1 exit /b 1 - echo. - echo.Testing of doctests in the sources finished, look at the ^ -results in %BUILDDIR%/doctest/output.txt. - goto end -) - -if "%1" == "coverage" ( - %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage - if errorlevel 1 exit /b 1 - echo. - echo.Testing of coverage in the sources finished, look at the ^ -results in %BUILDDIR%/coverage/python.txt. - goto end -) - -if "%1" == "xml" ( - %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The XML files are in %BUILDDIR%/xml. - goto end -) - -if "%1" == "pseudoxml" ( - %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. - goto end -) - -:end diff --git a/doc/visual-programming/source/_static/style.css b/doc/visual-programming/source/_static/style.css deleted file mode 100644 index 6dddc8f9df0..00000000000 --- a/doc/visual-programming/source/_static/style.css +++ /dev/null @@ -1,37 +0,0 @@ -p + dl { - border-top: 2px solid gray; - border-bottom: 2px solid gray; - padding: 12px; -} - -p + dl dd:last-of-type { - margin-bottom: 0; -} - -p + dl dt { - font-weight: bold; -} - -p + dl dt::after { - content: ":"; -} - -dd dt { - font-weight: bold; - display: inline-block; -} - -dd dt::after { - content: ":"; -} - -dd dd { - display: inline; - margin: 0; - } - -dd dd:after{ - display: block; - content: ''; - } - diff --git a/doc/visual-programming/source/building-workflows/DataTable-wrong.png b/doc/visual-programming/source/building-workflows/DataTable-wrong.png deleted file mode 100644 index 7bbaed507c4..00000000000 Binary files a/doc/visual-programming/source/building-workflows/DataTable-wrong.png and /dev/null differ diff --git a/doc/visual-programming/source/building-workflows/file-datatable.gif b/doc/visual-programming/source/building-workflows/file-datatable.gif deleted file mode 100644 index 603858248f4..00000000000 Binary files a/doc/visual-programming/source/building-workflows/file-datatable.gif and /dev/null differ diff --git a/doc/visual-programming/source/building-workflows/file.gif b/doc/visual-programming/source/building-workflows/file.gif deleted file mode 100644 index bb37a8d3ee9..00000000000 Binary files a/doc/visual-programming/source/building-workflows/file.gif and /dev/null differ diff --git a/doc/visual-programming/source/building-workflows/index.md b/doc/visual-programming/source/building-workflows/index.md deleted file mode 100644 index 168bdb63baf..00000000000 --- a/doc/visual-programming/source/building-workflows/index.md +++ /dev/null @@ -1,43 +0,0 @@ -# Building Workflows - -The core principle of Orange is visual programming, which means each analytical step in contained within a widget. Widgets are placed on the canvas and connected into an analytical workflow, which is executed from left to right. Orange never passes data backwards. - -## Simple workflow - -Let us start with a simple workflow. We will load the data with the File widget, say the famous *Iris* data set. Right-click on the canvas. A menu will appear. Start typing "File", then press Enter to confirm the selection. [File](../widgets/data/file.md) widget will be placed on the canvas. - -![](file.gif) - -**File** widget has an "ear" on its right side – this is the output of the widget. Click on the "ear" and drag a connection out of it. Upon releasing the connection, a menu will appear. Start typing the name of the widget to connect with the File widget, say Data Table. Select the widget and press enter. The widget is added to the canvas. - -![](file-datatable.gif) - -This is a simple workflow. The File widget loads the data and sends it to the output. Data Table receives the data and displays it in a table. Please note that Data Table is a viewer and passes onwards only the selection. The data is always available at the source - in the File widget. - -![](DataTable-wrong.png) - -## Workflows with subsets - -Visualizations in Orange are interactive, which means the user can select data instances from the plot and pass them downstream. Let us look at two examples with subsets. - -### Selecting subsets - -Place **File** widget on the canvas. Then connect [Scatter Plot](../widgets/visualize/scatterplot.md) to it. Click and drag a rectangle around a subset of points. Connect [Data Table](../widgets/data/datatable.md) to Scatter Plot. Data Table will show selected points. - -![](subset-selection.gif) - -### Highlighting workflows - -Place **File** widget on the canvas. Then connect **Scatter Plot** to it and a **Data Table**. Connect Data Table to Scatter Plot. Select a subset of points from the Data Table. Scatter Plot will highlight selected points. - -![](subset-highlight.gif) - -## Workflows with models - -Predictive models are evaluated in [Test and Score](../widgets/evaluate/testandscore.md) widget, while predictions on new data are done in [Predictions](../widgets/evaluate/predictions.md). Test and Score accepts several inputs: data (data set for evaluating models), learners (algorithms to use for training the model), and an optional preprocessor (for normalization or feature selection). - -![](prediction-workflow.png) - -For prediction, the training data is first passed to the model. Once the model is trained, it is passed to **Predictions**. The Predictions widget also needs data to predict on, which are passed as a second input. - -![](prediction-workflow2.png) diff --git a/doc/visual-programming/source/building-workflows/prediction-workflow.png b/doc/visual-programming/source/building-workflows/prediction-workflow.png deleted file mode 100644 index 19e8e19a578..00000000000 Binary files a/doc/visual-programming/source/building-workflows/prediction-workflow.png and /dev/null differ diff --git a/doc/visual-programming/source/building-workflows/prediction-workflow2.png b/doc/visual-programming/source/building-workflows/prediction-workflow2.png deleted file mode 100644 index 081c2462cc9..00000000000 Binary files a/doc/visual-programming/source/building-workflows/prediction-workflow2.png and /dev/null differ diff --git a/doc/visual-programming/source/building-workflows/subset-highlight.gif b/doc/visual-programming/source/building-workflows/subset-highlight.gif deleted file mode 100644 index 33ee691948d..00000000000 Binary files a/doc/visual-programming/source/building-workflows/subset-highlight.gif and /dev/null differ diff --git a/doc/visual-programming/source/building-workflows/subset-selection.gif b/doc/visual-programming/source/building-workflows/subset-selection.gif deleted file mode 100644 index caafa31a079..00000000000 Binary files a/doc/visual-programming/source/building-workflows/subset-selection.gif and /dev/null differ diff --git a/doc/visual-programming/source/conf.py b/doc/visual-programming/source/conf.py deleted file mode 100644 index a0ea565a078..00000000000 --- a/doc/visual-programming/source/conf.py +++ /dev/null @@ -1,385 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# -# Orange Visual Programming documentation build configuration file, created by -# sphinx-quickstart on Fri Nov 27 12:05:51 2015. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -import sys -import os - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) - -sys.path.append(os.path.abspath("../../..")) -sys.path.append(os.path.abspath(".")) - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.intersphinx", - "sphinx.ext.todo", - "sphinx.ext.imgmath", - "sphinx.ext.ifconfig", - "sphinx.ext.viewcode", - "recommonmark", -] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -source_suffix = [".md", ".rst"] - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = "index" - -# General information about the project. -project = "Orange Visual Programming" -copyright = "2015, Orange Data Mining" -author = "Orange Data Mining" - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = "3" -# The full version, including alpha/beta/rc tags. -release = "3" - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = "english" - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -# today = '' -# Else, today_fmt is used as the format for a strftime call. -# today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = [] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -# default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -# add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -# add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -# show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = "sphinx" - -# A list of ignored prefixes for module index sorting. -# modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -# keep_warnings = False - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = True - - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = "alabaster" - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -# html_theme_path = [] - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -# html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -# html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -# html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -# html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] - -# Add any extra paths that contain custom files (such as robots.txt or -# .htaccess) here, relative to this directory. These files are copied -# directly to the root of the documentation. -# html_extra_path = [] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -# html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -# html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -# html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -# html_additional_pages = {} - -# If false, no module index is generated. -# html_domain_indices = True - -# If false, no index is generated. -# html_use_index = True - -# If true, the index is split into individual pages for each letter. -# html_split_index = False - -# If true, links to the reST sources are added to the pages. -# html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -# html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -# html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -# html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -# html_file_suffix = None - -# Language to be used for generating the HTML full-text search index. -# Sphinx supports the following languages: -# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' -# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' -# html_search_language = 'en' - -# A dictionary with options for the search language support, empty by default. -# Now only 'ja' uses this config value -# html_search_options = {'type': 'default'} - -# The name of a javascript file (relative to the configuration directory) that -# implements a search results scorer. If empty, the default will be used. -# html_search_scorer = 'scorer.js' - -# Output file base name for HTML help builder. -htmlhelp_basename = "OrangeVisualProgrammingdoc" - -# -- Options for LaTeX output --------------------------------------------- - -# latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', - -# Latex figure (float) alignment -#'figure_align': 'htbp', -# } - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - ( - master_doc, - "OrangeVisualProgramming.tex", - "Orange Visual Programming Documentation", - "Orange Data Mining", - "manual", - ) -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -# latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -# latex_use_parts = False - -# If true, show page references after internal links. -# latex_show_pagerefs = False - -# If true, show URL addresses after external links. -# latex_show_urls = False - -# Documents to append as an appendix to all manuals. -# latex_appendices = [] - -# If false, no module index is generated. -# latex_domain_indices = True - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - ( - master_doc, - "orangevisualprogramming", - "Orange Visual Programming Documentation", - [author], - 1, - ) -] - -# If true, show URL addresses after external links. -# man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - ( - master_doc, - "OrangeVisualProgramming", - "Orange Visual Programming Documentation", - author, - "OrangeVisualProgramming", - "One line description of project.", - "Miscellaneous", - ) -] - -# Documents to append as an appendix to all manuals. -# texinfo_appendices = [] - -# If false, no module index is generated. -# texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -# texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -# texinfo_no_detailmenu = False - - -# -- Options for Epub output ---------------------------------------------- - -# Bibliographic Dublin Core info. -epub_title = project -epub_author = author -epub_publisher = author -epub_copyright = copyright - -# The basename for the epub file. It defaults to the project name. -# epub_basename = project - -# The HTML theme for the epub output. Since the default themes are not optimized -# for small screen space, using the same theme for HTML and epub output is -# usually not wise. This defaults to 'epub', a theme designed to save visual -# space. -# epub_theme = 'epub' - -# The language of the text. It defaults to the language option -# or 'en' if the language is not set. -# epub_language = '' - -# The scheme of the identifier. Typical schemes are ISBN or URL. -# epub_scheme = '' - -# The unique identifier of the text. This can be a ISBN number -# or the project homepage. -# epub_identifier = '' - -# A unique identification for the text. -# epub_uid = '' - -# A tuple containing the cover image and cover page html template filenames. -# epub_cover = () - -# A sequence of (type, uri, title) tuples for the guide element of content.opf. -# epub_guide = () - -# HTML files that should be inserted before the pages created by sphinx. -# The format is a list of tuples containing the path and title. -# epub_pre_files = [] - -# HTML files shat should be inserted after the pages created by sphinx. -# The format is a list of tuples containing the path and title. -# epub_post_files = [] - -# A list of files that should not be packed into the epub file. -epub_exclude_files = ["search.html"] - -# The depth of the table of contents in toc.ncx. -# epub_tocdepth = 3 - -# Allow duplicate toc entries. -# epub_tocdup = True - -# Choose between 'default' and 'includehidden'. -# epub_tocscope = 'default' - -# Fix unsupported image types using the Pillow. -# epub_fix_images = False - -# Scale large images. -# epub_max_image_width = 0 - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -# epub_show_urls = 'inline' - -# If false, no index is generated. -# epub_use_index = True - - -# Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} - diff --git a/doc/visual-programming/source/exporting-models/index.md b/doc/visual-programming/source/exporting-models/index.md deleted file mode 100644 index cbc7ff76137..00000000000 --- a/doc/visual-programming/source/exporting-models/index.md +++ /dev/null @@ -1,33 +0,0 @@ -# Exporting Models - -Predictive models can be saved and re-used. Models are saved in Python [pickle](https://docs.python.org/3/library/pickle.html) format. - -![](load-save-model.png) - -## Save model - -Models first require data for training. They output a trained model, which can be saved with [Save Model](../widgets/model/savemodel.md) widget in the pickle format. - -## Load model - -Models can be reused in different Orange workflows. [Load Model](../widgets/model/loadmodel.md) loads a trained model, which can be used in [Predictions](../widgets/evaluate/predictions.md) and elsewhere. - -## Load in Python - -Models can also be imported directly into Python and used in a script. - -```python -import pickle - -with open('model.pkcls', 'rb') as model: - lr = pickle.loads(model) - -lr ->> LogisticRegressionClassifier(skl_model=LogisticRegression(C=1, - class_weight=None, dual=False, - fit_intercept=True, intercept_scaling=1.0, - l1_ratio=None, max_iter=10000, - multi_class='auto', n_jobs=1, penalty='l2', - random_state=0, solver='lbfgs', tol=0.0001, - verbose=0, warm_start=False)) -``` diff --git a/doc/visual-programming/source/exporting-models/load-save-model.png b/doc/visual-programming/source/exporting-models/load-save-model.png deleted file mode 100644 index 592e1286c86..00000000000 Binary files a/doc/visual-programming/source/exporting-models/load-save-model.png and /dev/null differ diff --git a/doc/visual-programming/source/exporting-visualizations/index.md b/doc/visual-programming/source/exporting-visualizations/index.md deleted file mode 100644 index 771b89cc498..00000000000 --- a/doc/visual-programming/source/exporting-visualizations/index.md +++ /dev/null @@ -1,47 +0,0 @@ -# Exporting Visualizations - -Visualizations are an essential part of data science, and analytical reports are incomplete without them. Orange provides a couple of options for saving and modifying visualizations. - -At the bottom of each widget, there is a status bar. Visualization widgets have a Save icon (second from the left) and a Palette icon (fourth from the left). Save icon saves the plot to the computer. Palette icon opens a dialogue for modifying visualizations. - -![](statusbar-viz.png) - -## Saving a plot - -Visualizations in Orange can be saved in several formats, namely .png, .svg, .pdf, .pdf from matplotlib and as a matplotlib Python code. A common option is saving in .svg (scalable vector graphic), which you can edit with a vector graphics software such as [Inkscape](https://inkscape.org/). Ctrl+C (cmd+C) will copy a .png plot, which you can import with ctrl+V (cmd+V) into Word, PowerPoint, or other software tools. - -![](plot-format.png) - -[Matplotlib](https://matplotlib.org/) Python code is ideal for detailed editing and a high customization level. Below is an example of the Python code. It is possible to adjust the colors, size of the symbols, markers, etc. - -```python -import matplotlib.pyplot as plt -from numpy import array - -plt.clf() - -# data -x = array([1.4, 1.4, 1.3, 1.5, 1.4]) -y = array([0.2, 0.7, 0.9, 0.2, 0.1]) -# style -sizes = 13.5 -edgecolors = ['#3a9ed0ff', '#c53a27ff'] -edgecolors_index = array([0, 0, 1, 1, 1], dtype='int') -facecolors = ['#46befa80', '#ed462f80'] -facecolors_index = array([0, 0, 1, 1, 1], dtype='int') -linewidths = 1.5 -plt.scatter(x=x, y=y, s=sizes**2/4, marker='o', - facecolors=array(facecolors)[facecolors_index], - edgecolors=array(edgecolors)[edgecolors_index], - linewidths=linewidths) -plt.xlabel('petal length') -plt.ylabel('petal width') - -plt.show() -``` - -## Modifying a plot - -It is possible to modify certain parameters of a plot without digging into the code. Click on the Palette icon to open visual settings. One can change various attributes of the plot, such as fonts, font sizes, titles and so on. - -![](plot-options.png) diff --git a/doc/visual-programming/source/exporting-visualizations/plot-format.png b/doc/visual-programming/source/exporting-visualizations/plot-format.png deleted file mode 100644 index 8b18bb8b7da..00000000000 Binary files a/doc/visual-programming/source/exporting-visualizations/plot-format.png and /dev/null differ diff --git a/doc/visual-programming/source/exporting-visualizations/plot-options.png b/doc/visual-programming/source/exporting-visualizations/plot-options.png deleted file mode 100644 index 4c62c0d426d..00000000000 Binary files a/doc/visual-programming/source/exporting-visualizations/plot-options.png and /dev/null differ diff --git a/doc/visual-programming/source/exporting-visualizations/statusbar-viz.png b/doc/visual-programming/source/exporting-visualizations/statusbar-viz.png deleted file mode 100644 index c40e6ce757b..00000000000 Binary files a/doc/visual-programming/source/exporting-visualizations/statusbar-viz.png and /dev/null differ diff --git a/doc/visual-programming/source/index.rst b/doc/visual-programming/source/index.rst deleted file mode 100644 index e80687c34cc..00000000000 --- a/doc/visual-programming/source/index.rst +++ /dev/null @@ -1,167 +0,0 @@ -========================= -Orange Visual Programming -========================= - -Getting Started -=============== - -Here we need to copy the getting started guide. - -.. toctree:: - :maxdepth: 1 - - loading-your-data/index - building-workflows/index - exporting-models/index - exporting-visualizations/index - learners-as-scorers/index - report/index - -Widgets -======= - -Data ----- - -.. toctree:: - :maxdepth: 1 - - widgets/data/file - widgets/data/csvfileimport - widgets/data/datasets - widgets/data/sqltable - widgets/data/save - widgets/data/datainfo - widgets/data/aggregatecolumns - widgets/data/datatable - widgets/data/selectcolumns - widgets/data/selectrows - widgets/data/datasampler - widgets/data/transpose - widgets/data/discretize - widgets/data/continuize - widgets/data/createinstance - widgets/data/createclass - widgets/data/randomize - widgets/data/concatenate - widgets/data/select-by-data-index - widgets/data/paintdata - widgets/data/pivot - widgets/data/pythonscript - widgets/data/formula - widgets/data/editdomain - widgets/data/impute - widgets/data/mergedata - widgets/data/outliers - widgets/data/preprocess - widgets/data/applydomain - widgets/data/purgedomain - widgets/data/rank - widgets/data/correlations - widgets/data/color - widgets/data/featurestatistics - widgets/data/melt - widgets/data/neighbors - widgets/data/unique - widgets/data/groupby - - -Visualize ---------- - -.. toctree:: - :maxdepth: 1 - - widgets/visualize/boxplot - widgets/visualize/violinplot - widgets/visualize/distributions - widgets/visualize/heatmap - widgets/visualize/scatterplot - widgets/visualize/lineplot - widgets/visualize/barplot - widgets/visualize/venndiagram - widgets/visualize/linearprojection - widgets/visualize/sievediagram - widgets/visualize/pythagoreantree - widgets/visualize/pythagoreanforest - widgets/visualize/cn2ruleviewer - widgets/visualize/mosaicdisplay - widgets/visualize/silhouetteplot - widgets/visualize/treeviewer - widgets/visualize/nomogram - widgets/visualize/scoringsheetviewer - widgets/visualize/freeviz - widgets/visualize/radviz - - -Model ------ - -.. toctree:: - :maxdepth: 1 - - widgets/model/constant - widgets/model/cn2ruleinduction - widgets/model/calibratedlearner - widgets/model/knn - widgets/model/tree - widgets/model/randomforest - widgets/model/gradientboosting - widgets/model/svm - widgets/model/linearregression - widgets/model/pls - widgets/model/logisticregression - widgets/model/naivebayes - widgets/model/scoringsheet - widgets/model/adaboost - widgets/model/curvefit - widgets/model/neuralnetwork - widgets/model/stochasticgradient - widgets/model/stacking - widgets/model/loadmodel - widgets/model/savemodel - - -Evaluate --------- - -.. toctree:: - :maxdepth: 1 - - widgets/evaluate/calibrationplot - widgets/evaluate/confusionmatrix - widgets/evaluate/performancecurve - widgets/evaluate/predictions - widgets/evaluate/rocanalysis - widgets/evaluate/testandscore - widgets/evaluate/permutationplot - widgets/evaluate/parameterfitter - - -.. toctree:: - :maxdepth: 1 - - -Unsupervised ------------- - -.. toctree:: - :maxdepth: 1 - - widgets/unsupervised/PCA - widgets/unsupervised/correspondenceanalysis - widgets/unsupervised/distancemap - widgets/unsupervised/distances - widgets/unsupervised/distancematrix - widgets/unsupervised/distancetransformation - widgets/unsupervised/distancefile - widgets/unsupervised/savedistancematrix - widgets/unsupervised/hierarchicalclustering - widgets/unsupervised/kmeans - widgets/unsupervised/louvainclustering - widgets/unsupervised/DBSCAN - widgets/unsupervised/mds - widgets/unsupervised/tsne - widgets/unsupervised/manifoldlearning - widgets/unsupervised/selforganizingmap - diff --git a/doc/visual-programming/source/learners-as-scorers/index.md b/doc/visual-programming/source/learners-as-scorers/index.md deleted file mode 100644 index 3e260c54051..00000000000 --- a/doc/visual-programming/source/learners-as-scorers/index.md +++ /dev/null @@ -1,14 +0,0 @@ -# Learners as Scorers - -Certain learners can be used as feature scorers in Orange. Here's a quick example with [Random Forest](../widgets/model/randomforest.md). - -We are using the *iris* data for the example. Connect [File](../widgets/data/file.md) with [Rank](../widgets/data/rank.md). Then connect **Random Forest** to Rank. Random Forest will be used as a Scorer in this case. Rank will use Random Forest's feature importance to rank the attributes. - -![](scoring-with-RF.png) - -Passing additional scorers works for both, classification and regression: - -- [Logistic Regression](../widgets/model/logisticregression.md) (classification) / [Linear Regression](../widgets/model/linearregression.md) (regression) -- [Stochastic Gradient Descent](../widgets/model/stochasticgradient.md) -- [Gradient Boosting](../widgets/model/gradientboosting.md) -- Random Forest diff --git a/doc/visual-programming/source/learners-as-scorers/scoring-with-RF.png b/doc/visual-programming/source/learners-as-scorers/scoring-with-RF.png deleted file mode 100644 index 11d7ad1c9f9..00000000000 Binary files a/doc/visual-programming/source/learners-as-scorers/scoring-with-RF.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/File-Google-Sheet.png b/doc/visual-programming/source/loading-your-data/File-Google-Sheet.png deleted file mode 100644 index 33ef1c13f4a..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/File-Google-Sheet.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/File-set-feature-kind.png b/doc/visual-programming/source/loading-your-data/File-set-feature-kind.png deleted file mode 100644 index b17c293a80c..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/File-set-feature-kind.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/File.png b/doc/visual-programming/source/loading-your-data/File.png deleted file mode 100644 index 74ed28e13ff..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/File.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/data-table-for-select-columns.png b/doc/visual-programming/source/loading-your-data/data-table-for-select-columns.png deleted file mode 100644 index cdcd70afef8..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/data-table-for-select-columns.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/data-table-regression1.png b/doc/visual-programming/source/loading-your-data/data-table-regression1.png deleted file mode 100644 index e6c29d6e5ea..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/data-table-regression1.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/data-table-with-class1.png b/doc/visual-programming/source/loading-your-data/data-table-with-class1.png deleted file mode 100644 index b054840ccb0..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/data-table-with-class1.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/excel-with-tab1.png b/doc/visual-programming/source/loading-your-data/excel-with-tab1.png deleted file mode 100644 index f0da3cd1ce3..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/excel-with-tab1.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/file-browse.png b/doc/visual-programming/source/loading-your-data/file-browse.png deleted file mode 100644 index efe1b766b11..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/file-browse.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/file-browser-icon.png b/doc/visual-programming/source/loading-your-data/file-browser-icon.png deleted file mode 100644 index 36d3aac5e3b..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/file-browser-icon.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/file-data-table-workflow.png b/doc/visual-programming/source/loading-your-data/file-data-table-workflow.png deleted file mode 100644 index 991c02e38fe..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/file-data-table-workflow.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/index.md b/doc/visual-programming/source/loading-your-data/index.md deleted file mode 100644 index de4faa32244..00000000000 --- a/doc/visual-programming/source/loading-your-data/index.md +++ /dev/null @@ -1,131 +0,0 @@ -# Loading your Data - -Orange comes with its [own data format](https://docs.biolab.si/3/data-mining-library/tutorial/data.html#data-input), but can also handle native Excel, comma- or tab-delimited data files. The input data set is usually a table, with data instances (samples) in rows and data attributes in columns. Attributes can be of different *types* (numeric, categorical, datetime, and text) and have assigned *roles* (input features, meta attributes, and class). Data attribute type and role can be provided in the data table header. They can also be changed in the [File](../widgets/data/file.md) widget, while data role can also be modified with [Select Columns](../widgets/data/selectcolumns.md) widget. - -### In a Nutshell - -- Orange can import any comma- or tab-delimited data file, or Excel's native files or Google Sheets document. Use [File](../widgets/data/file.md) widget to load the data and, if needed, define the class and meta attributes. -- Types and roles can be set in the File widget. -- Attribute names in the column header can be preceded with a label followed by a hash. Use c for class and m for meta attribute, i to ignore a column, w for weights column, and C, D, T, S for continuous, discrete, time, and string attribute types. Examples: C\#mph, mS\#name, i\#dummy. -- An alternative to the hash notation is Orange's native format with three header rows: the first with attribute names, the second specifying the type (**continuous**, **discrete**, **time**, or **string**), and the third proving information on the attribute role (**class**, **meta**, **weight** or **ignore**). - -## Data from Excel - -Here is an example dataset ([sample.xlsx](http://file.biolab.si/datasets/sample.xlsx)) as entered in Excel: - -![](spreadsheet1.png) - -The file contains a header row, eight data instances (rows) and seven data attributes (columns). Empty cells in the table denote missing data entries. Rows represent genes; their function (class) is provided in the first column and their name in the second. The remaining columns store measurements that characterize each gene. With this data, we could, say, develop a classifier that would predict gene function from its characteristic measurements. - -Let us start with a simple workflow that reads the data and displays it in a table: - -![](file-data-table-workflow.png) - -To load the data, open the File widget (double click on the icon of the widget), click on the file browser icon ("...") and locate the downloaded file (called [sample.xlsx](http://file.biolab.si/datasets/sample.xlsx)) on your disk: - -![](File.png) - -### File Widget: Setting the Attribute Type and Role - -The **File** widget sends the data to the **Data Table**. Double click the **Data Table** to see its contents: - -![](table-widget.png) - -Orange correctly assumed that a column with gene names is meta information, which is displayed in the **Data Table** in columns shaded with light-brown. It has not guessed that *function*, the first non-meta column in our data file, is a class column. To correct this in Orange, we can adjust attribute role in the column display of File widget (below). Double-click the *feature* label in the *function* row and select *target* instead. This will set *function* attribute as our target (class) variable. - -![](File-set-feature-kind.png) - -You can also change attribute type from nominal to numeric, from string to datetime, and so on. Naturally, data values have to suit the specified attribute type. Datetime accepts only values in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) format, e.g. 2016-01-01 16:16:01. Orange would also assume the attribute is numeric if it has several different values, else it would be considered nominal. All other types are considered strings and are as such automatically categorized as meta attributes. - -Change of attribute roles and types should be confirmed by clicking the **Apply** button. - -### Select Columns: Setting the Attribute Role - -Another way to set the data role is to feed the data to the [Select Columns](../widgets/data/selectcolumns.md) widget: - -![](select-columns-schema.png) - -Opening [Select Columns](../widgets/data/selectcolumns.md) reveals Orange's classification of attributes. We would like all of our continuous attributes to be data features, gene function to be our target variable and gene names considered as meta attributes. We can obtain this by dragging the attribute names around the boxes in **Select Columns**: - -![](select-columns-start.png) - -To correctly reassign attribute types, drag attribute named *function* to a **Class** box, and attribute named *gene* to a **Meta Attribute** box. The [Select Columns](../widgets/data/selectcolumns.md) widget should now look like this: - -![](select-columns-reassigned.png) - -Change of attribute types in *Select Columns* widget should be confirmed by clicking the **Apply** button. The data from this widget is fed into [Data Table](../widgets/data/datatable.md) that now renders the data just the way we intended: - -![](data-table-with-class1.png) - -We could also define the domain for this dataset in a different way. Say, we could make the dataset ready for regression, and use *heat 0* as a continuous class variable, keep gene function and name as meta variables, and remove *heat 10* and *heat 20* from the dataset: - -![](select-columns-regression.png) - -By setting the attributes as above, the rendering of the data in the -Data Table widget gives the following output: - -![](data-table-regression1.png) - -## Header with Attribute Type Information - -Consider again the [sample.xlsx](http://file.biolab.si/datasets/sample.xlsx) dataset. This time we will augment the names of the attributes with prefixes that define attribute type (continuous, discrete, time, string) and role (class or meta attribute). Prefixes are separated from the attribute name with a hash sign ("\#"). Prefixes for attribute roles are: - -- c: class attribute -- m: meta attribute -- i: ignore the attribute -- w: instance weights - -and for the type: - -- C: Continuous -- D: Discrete -- T: Time -- S: String - -This is how the header with augmented attribute names looks like in Excel ([sample-head.xlsx](http://file.biolab.si/datasets/sample-head.xlsx)): - -![](spreadsheet-simple-head1.png) - -We can again use a **File** widget to load this dataset and then render it in the **Data Table**: - -![](select-cols-simplified-header.png) - -Notice that the attributes we have ignored (label "i" in the attribute name) are not present in the dataset. - -## Three-Row Header Format - -Orange's legacy native data format is a tab-delimited text file with three header rows. The first row lists the attribute names, the second row defines their type (continuous, discrete, time and string, or abbreviated c, d, t, and s), and the third row an optional role (class, meta, weight, or ignore). Here is an example: - -![](excel-with-tab1.png) - -Data from Google Sheets ------------------------ - -Orange can read data from Google Sheets, as long as it conforms to the data presentation rules we have presented above. In Google Sheets, copy the shareable link (Share button, then Get shareable link) and paste it in the *Data File / URL* box of the File widget. For a taste, here's one such link you can use: [http://bit.ly/1J12Tdp](http://bit.ly/1J12Tdp), and the way we have entered it in the **File** widget: - -![](File-Google-Sheet.png) - -## Data from LibreOffice - -If you are using LibreOffice, simply save your files in Excel (.xlsx) format (available from the drop-down menu under *Save As Type*). - -![](saving-tab-delimited-files.png) - -## Datetime Format - -To avoid ambiguity, Orange supports date and/or time formatted in one of the [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) formats. For example, the following values are all valid: - - 2016 - 2016-12-27 - 2016-12-27 14:20:51 - 16:20 - -## Attributes of attributes - -In the third row of the header, one can specify additional attribute information. For example: - - - *type=image*: signifies a column with a path to the image (local or URL) - - *include=True*: signifies a column that is considered a text feature (can also be set in the **Corpus** widget) - - *title=True*: signifies a column used for titles in document list, i.e. **Corpus Viewer** - -These attributes can be edited with **Edit Domain**. diff --git a/doc/visual-programming/source/loading-your-data/sample-head.xlsx b/doc/visual-programming/source/loading-your-data/sample-head.xlsx deleted file mode 100644 index cab83c7df1f..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/sample-head.xlsx and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/sample.csv b/doc/visual-programming/source/loading-your-data/sample.csv deleted file mode 100644 index ba64e664545..00000000000 --- a/doc/visual-programming/source/loading-your-data/sample.csv +++ /dev/null @@ -1 +0,0 @@ -function,gene,spo-early,spo-mid,heat 0,heat 10,heat 20 Proteas,YDR427W,0.301,0.546,,-0.009,0.024 Proteas,YGL048C,0.208,,-0.061,-0.039,0.003 Resp,YBR039W,-0.179,-0.219,-0.097,,-0.011 Ribo,YKL180W,-0.085,-0.161,-0.061,-0.265,-0.419 Ribo,YHR021C,-0.216,-0.253,-0.228,-0.168,-0.228 Resp,YDR178W,0.017,0.07,0.058,0.286,0.205 Resp,YLL041C,0.115,,0.033,0.262,0.054 Resp,YOR065W,0.005,-0.023,-0.038,0.222,0.088 \ No newline at end of file diff --git a/doc/visual-programming/source/loading-your-data/sample.xlsx b/doc/visual-programming/source/loading-your-data/sample.xlsx deleted file mode 100644 index 598473a54d2..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/sample.xlsx and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/saving-tab-delimited-files.png b/doc/visual-programming/source/loading-your-data/saving-tab-delimited-files.png deleted file mode 100644 index 5a28fd29e8d..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/saving-tab-delimited-files.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/select-attributes-schema.png b/doc/visual-programming/source/loading-your-data/select-attributes-schema.png deleted file mode 100644 index 98a1ff3654e..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/select-attributes-schema.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/select-cols-simplified-header.png b/doc/visual-programming/source/loading-your-data/select-cols-simplified-header.png deleted file mode 100644 index c448b706493..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/select-cols-simplified-header.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/select-columns-reassigned.png b/doc/visual-programming/source/loading-your-data/select-columns-reassigned.png deleted file mode 100644 index 1fc5eddad1f..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/select-columns-reassigned.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/select-columns-regression.png b/doc/visual-programming/source/loading-your-data/select-columns-regression.png deleted file mode 100644 index f5fde3e5e1f..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/select-columns-regression.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/select-columns-schema.png b/doc/visual-programming/source/loading-your-data/select-columns-schema.png deleted file mode 100644 index ecaeca29cb0..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/select-columns-schema.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/select-columns-start.png b/doc/visual-programming/source/loading-your-data/select-columns-start.png deleted file mode 100644 index b267e8c312d..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/select-columns-start.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/spreadsheet-simple-head1.png b/doc/visual-programming/source/loading-your-data/spreadsheet-simple-head1.png deleted file mode 100644 index e9bc80ce557..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/spreadsheet-simple-head1.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/spreadsheet1.png b/doc/visual-programming/source/loading-your-data/spreadsheet1.png deleted file mode 100644 index 53fd39dffc6..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/spreadsheet1.png and /dev/null differ diff --git a/doc/visual-programming/source/loading-your-data/table-widget.png b/doc/visual-programming/source/loading-your-data/table-widget.png deleted file mode 100644 index d56979ad62b..00000000000 Binary files a/doc/visual-programming/source/loading-your-data/table-widget.png and /dev/null differ diff --git a/doc/visual-programming/source/report/index.md b/doc/visual-programming/source/report/index.md deleted file mode 100644 index 1722ef8fab6..00000000000 --- a/doc/visual-programming/source/report/index.md +++ /dev/null @@ -1,17 +0,0 @@ -# Report - -It is possible to compile a report in Orange. We can save the report in .html, .pdf or .report format. Reports allow us to trace back analytical steps as it saves the workflow at which each report segment was created. - -Each widget has a report button in the status bar at the bottom. Pressing on the the File icon adds a new section to the report. - -![](report-button.png) - -Report can be examined with View - Show report. - -## Simple example - -We built a simple workflow with File and Scatter Plot, adding a section to the report at each step. Widgets report parameters, visualizations, and other settings. Each section includes a comment for extra explanation. - -![](report.png) - -To remove a report section, hover on the section in the list on the left. A Trash and an Orange icon will appear. The trash icon removes the section from the report list. Orange icon loads the workflow as it was at the time of creating the section. This is very handy if a colleague wishes to inspect the results. This option is available only if the report is saved in .report format. diff --git a/doc/visual-programming/source/report/report-button.png b/doc/visual-programming/source/report/report-button.png deleted file mode 100644 index c73586728f7..00000000000 Binary files a/doc/visual-programming/source/report/report-button.png and /dev/null differ diff --git a/doc/visual-programming/source/report/report.png b/doc/visual-programming/source/report/report.png deleted file mode 100644 index 20d59846e7e..00000000000 Binary files a/doc/visual-programming/source/report/report.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/aggregatecolumns.md b/doc/visual-programming/source/widgets/data/aggregatecolumns.md deleted file mode 100644 index a04cc9515c6..00000000000 --- a/doc/visual-programming/source/widgets/data/aggregatecolumns.md +++ /dev/null @@ -1,37 +0,0 @@ -Aggregate Columns -================= - -Compute a sum, max, min ... of selected columns. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Data: extended dataset - -**Aggregate Columns** outputs an aggregation of selected columns, for example a sum, min, max, etc. - -![](images/AggregateColumns.png) - -1. Selected attributes. -2. Operator for aggregation: - - sum - - product - - min - - max - - mean - - variance - - median -3. Set the name of the computed attribute. -4. If *Apply automatically* is ticked, changes will be communicated automatically. Alternatively, click *Apply*. - -Example -------- - -We will use iris data from the [File](../data/file.md) widget for this example and connect it to **Aggregate Columns**. - -Say we wish to compute a sum of *sepal_length* and *sepal_width* attributes. We select the two attributes from the list. - -![](images/AggregateColumns-Example.png) diff --git a/doc/visual-programming/source/widgets/data/applydomain.md b/doc/visual-programming/source/widgets/data/applydomain.md deleted file mode 100644 index 15a5fcc517f..00000000000 --- a/doc/visual-programming/source/widgets/data/applydomain.md +++ /dev/null @@ -1,37 +0,0 @@ -Apply Domain -============ - -Given dataset and template transforms the dataset. - -**Inputs** - -- Data: input dataset -- Template Data: template for transforming the dataset - -**Outputs** - -- Transformed Data: transformed dataset - -**Apply Domain** maps new data into a transformed space. For example, if we transform some data with PCA and wish to observe new data in the same space, we can use Apply Domain to map the new data into the PCA space created from the original data. - -![](images/ApplyDomain.png) - -The widget receives a dataset and a template dataset used to transform the dataset. - -Side note --------- - -Domain transformation works by using information from the template data. For example, for PCA, Components are not enough. Transformation requires information on the center of each column, variance (if the data is normalized), and if and how the data was preprocessed (continuized, imputed, etc.). - -Example -------- - -We will use iris data from the [File](../data/file.md) widget for this example. To create two separate data sets, we will use [Select Rows](../data/selectrows.md) and set the condition to *iris is one of iris-setosa, iris-versicolor*. This will output a data set with a 100 rows, half of them belonging to iris-setosa class and the other half to iris-versicolor. - -We will transform the data with [PCA](../unsupervised/PCA.md) and select the first two components, which explain 96% of variance. Now, we would like to apply the same preprocessing on the 'new' data, that is the remaining 50 iris virginicas. Send the unused data from **Select Rows** to **Apply Domain**. Make sure to use the *Unmatched Data* output from **Select Rows** widget. Then add the *Transformed data* output from **PCA**. - -**Apply Domain** will apply the preprocessor to the new data and output it. To add the new data to the old data, use [Concatenate](../data/concatenate.md). Use *Transformed Data* output from **PCA** as *Primary Data* and *Transformed Data* from **Apply Domain** as *Additional Data*. - -Observe the results in a [Data Table](../data/datatable.md) or in a [Scatter Plot](../visualize/scatterplot.md) to see the new data in relation to the old one. - -![](images/ApplyDomain-Example.png) diff --git a/doc/visual-programming/source/widgets/data/color.md b/doc/visual-programming/source/widgets/data/color.md deleted file mode 100644 index c6b21ee677a..00000000000 --- a/doc/visual-programming/source/widgets/data/color.md +++ /dev/null @@ -1,46 +0,0 @@ -Color -===== - -Set color legend for variables. - -**Inputs** - -- Data: input data set - -**Outputs** - -- Data: data set with a new color legend - -The **Color** widget sets the color legend for visualizations. - -![](images/Color-stamped.png) - -1. A list of discrete variables. Set the color of each variable by double-clicking on it. The widget also enables renaming variables by clicking on their names. -2. A list of continuous variables. Click on the color strip to choose a different palette. To use the same palette for all variables, change it for one variable and click *Copy to all* that appears on the right. The widget also enables renaming variables by clicking on their names. -3. Produce a report. -4. Apply changes. If *Apply automatically* is ticked, changes will be communicated automatically. Alternatively, just click *Apply*. - -![](images/Color-Continuous_unindexed.png) - -Palettes for numeric variables are grouped and tagged by their properties. - -- Diverging palettes have two colors on its ends and a central color (white or black) in the middle. Such palettes are particularly useful when the the values can be positive or negative, as some widgets (for instance the Heat map) will put the 0 at the middle point in the palette. - -- Linear palettes are constructed so that human perception of the color change is linear with the change of the value. - -- Color blind palettes cover different types of color blindness, and can also be linear or diverging. - -- In isoluminant palettes, all colors have equal brightness. - -- Rainbow palettes are particularly nice in widgets that bin numeric values in visualizations. - -Example -------- - -We chose to work with the *heart_disease* data set. We opened the color palette and selected two new colors for diameter narrowing variable. Then we opened the [Scatter Plot](../visualize/scatterplot.md) widget and viewed the changes made to the scatter plot. - -![](images/Color-Example-Discrete.png) - -To see the effect of color palettes for numeric variables, we color the points in the scatter plot by cholesterol and change the palette for this attribute in the Color widget. - -![](images/Color-Example-Continuous.png) diff --git a/doc/visual-programming/source/widgets/data/concatenate.md b/doc/visual-programming/source/widgets/data/concatenate.md deleted file mode 100644 index 903d933a85d..00000000000 --- a/doc/visual-programming/source/widgets/data/concatenate.md +++ /dev/null @@ -1,40 +0,0 @@ -Concatenate -=========== - -Concatenates data from multiple sources. - -**Inputs** - -- Primary Data: data set that defines the attribute set -- Additional Data: additional data set - -**Outputs** - -- Data: concatenated data - -The widget concatenates multiple sets of instances (data sets). The merge is "vertical", in a sense that two sets of 10 and 5 instances yield a new set of 15 instances. - -![](images/Concatenate.png) - -1. Set the attribute merging method: - - *all variables that appear in input tables* will output columns from all input tables, assinging missing values for columns that were absent in each table - - *only variables that appear in all tables* will output an intersection of columns from all input tables - -2. When merging multiple tables with the same data but different column names, check *Use column names from the primary table and ignore names in other tables*. The types of variables and names of categories (for categorical variables) must match. - - Check *Treat variables with the same name as the same variable, even if they are computed using different formulae* to consider columns with the same name as the same column in all input tables. Leaving it unchecked will not match by column name but by column identity. - -3. Add the identification of source data sets to the output data set. The option will output an additional column with *Feature name* and a given type, defined in the *Place* option. The default will place source ID as a class variable. The new column will contain the names of the input tables as values. - -4. If *Apply automatically* is ticked, changes are communicated automatically. Otherwise, click *Apply*. - -If one of the tables is connected to the widget as the primary table, the resulting table will contain only the attributes from this table. If there is no primary table, the attributes can be either a union of all attributes that appear in the tables specified as *Additional Tables*, or their intersection, that is, a list of attributes common to all the connected tables. - -Example -------- - -As shown below, the widget can be used for merging data from two separate files. Let's say we have two data sets with the same attributes, one containing instances from the first experiment and the other instances from the second experiment and we wish to join the two data tables together. We use the **Concatenate** widget to merge the data sets by attributes (appending new rows under existing attributes). - -Below, we used a modified *Zoo* data set. In the [first](http://file.biolab.si/datasets/zoo-first.tab) [File](../data/file.md) widget, we loaded only the animals beginning with the letters A and B and in the [second](http://file.biolab.si/datasets/zoo-second.tab) one only the animals beginning with the letter C. Upon concatenation, we observe the new data in the [Data Table](../data/datatable.md) widget, where we see the complete table with animals from A to C. - -![](images/Concatenate-Example.png) diff --git a/doc/visual-programming/source/widgets/data/continuize.md b/doc/visual-programming/source/widgets/data/continuize.md deleted file mode 100644 index 0a3d85bc714..00000000000 --- a/doc/visual-programming/source/widgets/data/continuize.md +++ /dev/null @@ -1,69 +0,0 @@ -Continuize -========== - -Turns discrete variables (attributes) into numeric ("continuous") dummy variables. - -**Inputs** - -- Data: input data set - -**Outputs** - -- Data: transformed data set - -The **Continuize** widget receives a data set in the input and outputs the same data set in which some or all categorical variables are replaced with continuous ones and numeric variables are scaled. - -![](images/Continuize-stamped.png) - -1. Select a categorical attribute to define its specific treatmen, or click the "Deafult" option above to set the default treatment for all categorical attributes without specific settings. - - Multiple attributes can be chosen. - -2. Define the treatment of categorical variables. - - Examples in this section will assume that we have a categorical attribute *status* with values *low*, *middle* and *high*, listed in that order. Options for their transformation are: - - - **Use default setting**: use the default treatment. - - - **Leave categorical**: leave the attribute as it is. - - - **First value as base**: a N-valued categorical variable will be transformed into N-1 numeric variables, each serving as an indicator for one of the original values except for the base value. The base value is the first value in the list. By default, the values are ordered alphabetically; their order can be changed in [Edit Domain](../data/editdomain). - - In the above case, the three-valued variable *status* is transformed into two numeric variables, *status=middle* with values 0 or 1 indicating whether the original variable had value *middle* on a particular example, and similarly, *status=high*. - - - **Most frequent value as base**: similar to the above, except that the most frequent value is used as a base. So, if the most frequent value in the above example is *middle*, then *middle* is considered as the base and the two newly constructed variables are *status=low* and *status=high*. - - - **One-hot encoding**: this option constructs one numeric variable per each value of the original variable. In the above case, we would get variables *status=low*, *status=middle* and *status=high*. - - - **Remove if more than 3 values**: removes non-binary categorical variables from the data. - - - **Remove**: removes the attribute. - - - **Treat as ordinal**: converts the variable into a single numeric variable enumerating the original values. In the above case, the new variable would have the value of 0 for *low*, 1 for *middle* and 2 for *high*. Again note that the order of values can be set in [Edit Domain](../data/editdomain). - - - **Treat as normalized ordinal**: same as above, except that values are normalized into range 0-1. In our example, the values of the new variable would be 0, 0.5 and 1. - -3. Select attributes to set individual treatments or click "Default" to set the default treatment for numeric attributes. - -4. Define the treatment of numeric attributes. - - - **Use default setting**: use the general default. - - **Leave as it is**: do not change anything. - - **Standardize**: subtract the mean and divide by the standard deviation (not available for sparse data). - - **Center**: subtract the mean (not available for sparse data). - - **Scale**: divide by standard deviation. - - **Normalize to interval [-1, 1]**: linearly scale the values into interval [-1, 1] (not available for sparse data) - - **Normalize to interval [0, 1]**: linearly scale the values into interval [0, 1] (not available for sparse data) - -5. If checked, the class attribute is converted in the same fashion as categorical attributes that are treated as ordinal (see above). - -Examples --------- - -First, let's see what is the output of the **Continuize** widget. We feed the original data (the *Heart disease* data set) into the [Data Table](../data/datatable) and see how they look like. Then we continuize the discrete values using various options and observe them in another [Data Table](../data/datatable). - -![](images/Continuize-Example1.png) - -In the second example, we show a typical use of this widget - in order to properly plot the linear projection of the data, discrete attributes need to be converted to continuous ones and that is why we put the data through the **Continuize** widget before drawing it. Gender, for instance, is transformed into two attributes "*gender=female*" and *gender=male*. - -![](images/Continuize-Example2.png) diff --git a/doc/visual-programming/source/widgets/data/correlations.md b/doc/visual-programming/source/widgets/data/correlations.md deleted file mode 100644 index 18263b7eeec..00000000000 --- a/doc/visual-programming/source/widgets/data/correlations.md +++ /dev/null @@ -1,36 +0,0 @@ -Correlations -============ - -Compute all pairwise attribute correlations. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Data: input dataset -- Features: selected pair of features -- Correlations: data table with correlation scores - -**Correlations** computes Pearson or Spearman correlation scores for all pairs of features in a dataset. These methods can only detect monotonic relationship. - -![](images/Correlations-stamped.png) - -1. Correlation measure: - - Pairwise [Pearson](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient) correlation. - - Pairwise [Spearman](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient) correlation. -2. Filter for finding attribute pairs. -3. A list of attribute pairs with correlation coefficient. Press *Finished* to stop computation for large datasets. -4. Access widget help and produce report. - -Example -------- - -Correlations can be computed only for numeric (continuous) features, so we will use *housing* as an example data set. Load it in the [File](file.md) widget and connect it to **Correlations**. Positively correlated feature pairs will be at the top of the list and negatively correlated will be at the bottom. - -![](images/Correlations-links.png) - -Go to the most negatively correlated pair, DIS-NOX. Now connect [Scatter Plot](../visualize/scatterplot.md) to **Correlations** and set two outputs, Data to Data and Features to Features. Observe how the feature pair is immediately set in the scatter plot. Looks like the two features are indeed negatively correlated. - -![](images/Correlations-Example.png) diff --git a/doc/visual-programming/source/widgets/data/createclass.md b/doc/visual-programming/source/widgets/data/createclass.md deleted file mode 100644 index 4bce32ab9c8..00000000000 --- a/doc/visual-programming/source/widgets/data/createclass.md +++ /dev/null @@ -1,38 +0,0 @@ -Create Class -============ - -Create class attribute from a string attribute. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Data: dataset with a new class variable - -**Create Class** creates a new class attribute from an existing discrete or string attribute. The widget matches the string value of the selected attribute and constructs a new user-defined value for matching instances. - -![](images/CreateClass-stamped.png) - -1. The attribute the new class is constructed from. -2. Matching: - - Name: the name of the new class value - - Substring: regex-defined substring that will match the values from the above-defined attribute - - Instances: the number of instances matching the substring - - Press '+' to add a new class value -3. Name of the new class column. -4. Match only at the beginning will begin matching from the beginning of the string. Case sensitive will match by case, too. -5. Produce a report. -6. Press *Apply* to commit the results. - -Example -------- - -Here is a simple example with the *auto-mpg* dataset. Pass the data to **Create Class**. Select *car_name* as a column to create the new class from. Here, we wish to create new values that match the car brand. First, we type *ford* as the new value for the matching strings. Then we define the substring that will match the data instances. This means that all instances containing *ford* in their *car_name*, will now have a value *ford* in the new class column. Next, we define the same for *honda* and *fiat*. The widget will tell us how many instance are yet unmatched (remaining instances). We will name them *other*, but you can continue creating new values by adding a condition with '+'. - -We named our new class column *car_brand* and we matched at the beginning of the string. - -![](images/CreateClass-example.png) - -Finally, we can observe the new column in a [Data Table](../data/datatable.md) or use the value as color in the [Scatter Plot](../visualize/scatterplot.md). diff --git a/doc/visual-programming/source/widgets/data/createinstance.md b/doc/visual-programming/source/widgets/data/createinstance.md deleted file mode 100644 index 16725371f2a..00000000000 --- a/doc/visual-programming/source/widgets/data/createinstance.md +++ /dev/null @@ -1,44 +0,0 @@ -Create Instance -=============== - -Interactively creates an instance from a sample dataset. - -**Inputs** - -- Data: input dataset -- Reference: refrence dataset - -**Outputs** - -- Data: input dataset appended the created instance - -The **Create Instance** widget creates a new instance, based on the input data. The widget displays all variables of the input dataset in a table of two columns. The column *Variable* represents the variable's name, meanwhile the column *Value* enables setting the variable's value. Each value is initially set to median value of the variable. The values can be manually set to *Median*, *Mean*, *Random* or *Input* by clicking the corresponding button. For easier searching through the variables, the table has filter attached. When clicking upon one of the mentioned buttons, only filtered variables are considered. One can also set the value by right-clicking a row and selecting an option in a context menu. - -![](images/CreateInstance-stamped.png) - -1. Filter table by variable name. -2. The column represents a variable's name and type. The table can be sorted by clicking the columns header. -3. Provides controls for value editing. -4. Set filtered variables' values to: - - *Median*: median value of variable in the input dataset - - *Mean*: mean value of variable in the input dataset - - *Random*: random value in a range of variable in the input dataset - - *Input*: median value of variable in the reference dataset -5. If *Append this instance to input data* is ticked, the created instance is appended to the input dataset. Otherwise, a single instance appears on the output. To distinguish between created and original data, *Source ID* variable is added. -5. If *Apply automatically* is ticked, changes are committed automatically. Otherwise, you have to press *Apply* after each change. -6. Produce a report. -7. Information on input and reference dataset. -8. Information on output dataset. - -Example -------- - -The **Create Instance** is usually used to examine a model performance on some arbitrary data. The basic usage is shown in the following workflow, where a (*Housing*) dataset is used to fit a [Linear Regression](../model/linearregression.md) model, which is than used to [predict](../evaluate/predictions.md) a target value for data, created by the *Create Instance* widget. Inserting a [Rank](../data/rank.md) widget between [File](../data/file.md) and *Create Instance* enables outputting (and therefore making predictions on) the most important features. -A [Select Column](../data/selectcolumns.md) widget is inserted to omit the actual target value. - -![](images/CreateInstance-example.png) - -The next example shows how to check whether the created instance is some kind of outlier. The creates instance is feed to [PCA](../unsupervised/PCA.md) whose first and second componens are then examined in a [Scatter Plot](../visualize/scatterplot.md). The created instance is colored red in the plot and it could be considered as an outlier if it appears far from the original data (blue). - -![](images/CreateInstance-example2.png) - diff --git a/doc/visual-programming/source/widgets/data/csvfileimport.md b/doc/visual-programming/source/widgets/data/csvfileimport.md deleted file mode 100644 index f9564eab947..00000000000 --- a/doc/visual-programming/source/widgets/data/csvfileimport.md +++ /dev/null @@ -1,65 +0,0 @@ -CSV File Import -=============== - -Import a data table from a CSV formatted file. - -**Outputs** - -- Data: dataset from the .csv file -- Data Frame: pandas DataFrame object - -The **CSV File Import** widget reads comma-separated files and sends the dataset to its output channel. File separators can be commas, semicolons, spaces, tabs or manually-defined delimiters. The history of most recently opened files is maintained in the widget. - -*Data Frame* output can be used in the [Python Script](../data/pythonscript.md) widget by connecting it to the `in_object` input (e.g. `df = in_object`). Then it can be used a regular DataFrame. - -### Import Options - -The import window where the user sets the import parameters. Can be re-opened by pressing *Import Options* in the widget. - -Right click on the column name to set the column type. Right click on the row index (on the left) to mark a row as a header, skipped or a normal data row. - -![](images/CSVFileImport-ImportOptions-stamped.png) - -1. File encoding. Default is UTF-8. See Encoding subchapter for details. -2. Import settings: - - *Cell delimiter*: - - Tab - - Comma - - Semicolon - - Space - - Other (set the delimiter in the field to the right) - - *Quote character*: either " or '. Defines what is considered a text. - - *Number separators*: - - Grouping: delimiters for thousands, e.g. 1,000 - - Decimal: delimiters for decimals, e.g. 1.234 -3. Column type: select the column in the preview and set its type. Column type can be set also by right-clicking on the selected column. - - *Auto*: Orange will automatically try to determine column type. (default) - - *Numeric*: for continuous data types, e.g. (1.23, 1.32, 1.42, 1.32) - - *Categorical*: for discrete data types, e.g. (brown, green, blue) - - *Text*: for string data types, e.g. (John, Olivia, Mike, Jane) - - *Datetime*: for time variables, e.g. (1970-01-01) - - *Ignore*: do not output the column. -4. Pressing *Reset* will return the settings to the previously set state (saved by pressing OK in the Import Options dialogue). *Restore Defaults* will set the settings to their default values. *Cancel* aborts the import, while *OK* imports the data and saves the settings. - -### Widget - -The widget once the data is successfully imported. - -![](images/CSVFileImport-widget-stamped.png) - -1. The folder icon opens the dialogue for import the local .csv file. It can be used to either load the first file or change the existing file (load new data). The *File* dropdown stores paths to previously loaded data sets. -2. Information on the imported data set. Reports on the number of instances (rows), variables (features or columns) and meta variables (special columns). -3. *Import Options* re-opens the import dialogue where the user can set delimiters, encodings, text fields and so on. *Cancel* aborts data import. *Reload* imports the file once again, adding to the data any changes made in the original file. - -### Encoding - -The dialogue for settings custom encodings list in the Import Options - Encoding dropdown. Select *Customize Encodings List...* to change which encodings appear in the list. To save the changes, simply close the dialogue. Closing and reopening Orange (even with Reset widget settings) will not re-set the list. To do this, press *Restore Defaults*. To have all the available encodings in the list, press *Select all*. - -![](images/CSVFileImport-encodings.png) - -Example -------- - -**CSV File Import** works almost exactly like the [File](../data/file.md) widget, with the added options for importing different types of .csv files. In this workflow, the widget read the data from the file and sends it to the [Data Table](../data/datatable.md) for inspection. - -![](images/CSVFileImport-Example.png) diff --git a/doc/visual-programming/source/widgets/data/datainfo.md b/doc/visual-programming/source/widgets/data/datainfo.md deleted file mode 100644 index 669cbcab82c..00000000000 --- a/doc/visual-programming/source/widgets/data/datainfo.md +++ /dev/null @@ -1,27 +0,0 @@ -Data Info -========= - -Displays information on a selected dataset. - -**Inputs** - -- Data: input dataset - -A simple widget that presents information on dataset size, features, -targets, meta attributes, and location. - -![](images/data-info-stamped.png) - -1. Information on dataset size -2. Information on discrete and continuous features -3. Information on targets -4. Information on meta attributes -5. Information on where the data is stored -6. Produce a report. - -Example -------- - -Below, we compare the basic statistics of two **Data Info** widgets - one with information on the entire dataset and the other with information on the (manually) selected subset from the [Scatter Plot](../visualize/scatterplot.md) widget. We used the *Iris* dataset. - -![](images/DataInfo-Example.png) diff --git a/doc/visual-programming/source/widgets/data/datasampler.md b/doc/visual-programming/source/widgets/data/datasampler.md deleted file mode 100644 index 533ba5f8ada..00000000000 --- a/doc/visual-programming/source/widgets/data/datasampler.md +++ /dev/null @@ -1,54 +0,0 @@ -Data Sampler -============ - -Selects a subset of data instances from an input dataset. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Data Sample: sampled data instances -- Remaining Data: out-of-sample data - -The **Data Sampler** widget implements several data sampling methods. It outputs a sampled and a complementary dataset (with instances from the input set that are not included in the sampled dataset). The output is processed after the input dataset is provided and *Sample Data* is pressed. - -![](images/DataSampler-stamped.png) - -1. Information on the input and output dataset. -2. The desired sampling method: - - **Fixed proportion of data** returns a selected percentage of the entire data (e.g. 70% of all the data) - - **Fixed sample size** returns a selected number of data instances with a chance to set *Sample with replacement*, which always samples from the entire dataset (does not subtract instances already in the subset). With replacement, you can generate more instances than available in the input dataset. - - [Cross Validation](https://en.wikipedia.org/wiki/Cross-validation_(statistics)) partitions data instances into the specified number of complementary subsets. Following a typical validation schema, all subsets except the one selected by the user are output as Data Sample, and the selected subset goes to Remaining Data. (Note: In older versions, the outputs were swapped. If the widget is loaded from an older workflow, it switches to compatibility mode.) - - [Bootstrap](https://en.wikipedia.org/wiki/Bootstrapping_(statistics)) infers the sample from the population statistic. -3. *Replicable sampling* maintains sampling patterns that can be carried - across users, while *stratify sample* mimics the composition of the - input dataset. -4. Press *Sample Data* to output the data sample. - -If all data instances are selected (by setting the proportion to 100 % or setting the fixed sample size to the entire data size), output instances are still shuffled. - -Examples --------- - -First, let's see how the **Data Sampler** works. We will use the *iris* data from the [File](../data/file.md) widget. We see there are 150 instances in the data. We sampled the data with the **Data Sampler** widget and we chose to go with a fixed sample size of 5 instances for simplicity. We can observe the sampled data in the [Data Table](../data/datatable.md) widget (Data Table (in-sample)). The second [Data Table](../data/datatable.md) (Data Table (out-of-sample)) shows the remaining 145 instances that weren't in the sample. To output the out-of-sample data, double-click the connection between the widgets and rewire the output to *Remaining Data --> Data*. - -![](images/DataSampler-Example1.png) - -Now, we will use the **Data Sampler** to split the data into training and testing part. We are using the *iris* data, which we loaded with the [File](../data/file.md) widget. In **Data Sampler**, we split the data with *Fixed proportion of data*, keeping 70% of data instances in the sample. - -Then we connected two outputs to the [Test & Score](../evaluate/testandscore.md) widget, *Data Sample --> Data* and *Remaining Data --> Test Data*. Finally, we added [Logistic Regression](../model/logisticregression.md) as the learner. This runs logistic regression on the Data input and evaluates the results on the Test Data. - -![](images/DataSampler-Example2.png) - -Over/Undersampling ------------------- - -**Data Sampler** can also be used to oversample a minority class or undersample majority class in the data. Let us show an example for oversampling. First, separate the minority class using a [Select Rows](../data/selectrows.md) widget. We are using the *iris* data from the [File](../data/file.md) widget. The data set has 150 data instances, 50 of each class. Let us oversample, say, *iris-setosa*. - -In **Select Rows**, set the condition to *iris is iris-setosa*. This will output 50 instances of the *iris-setosa* class. Now, connect *Matching Data* into the **Data Sampler**, select *Fixed sample size*, set it to, say, 100 and select *Sample with replacement*. Upon pressing *Sample Data*, the widget will output 100 instances of *iris-setosa* class, some of which will be duplicated (because we used *Sample with replacement*). - -Finally, use [Concatenate](../data/concatenate) to join the oversampled instances and the *Unmatched Data* output of the **Select Rows** widget. This outputs a data set with 200 instances. We can observe the final results in the [Distributions](../visualize/distributions). - -![](images/DataSampler-Example-OverUnderSampling.png) diff --git a/doc/visual-programming/source/widgets/data/datasets.md b/doc/visual-programming/source/widgets/data/datasets.md deleted file mode 100644 index 9943af26d30..00000000000 --- a/doc/visual-programming/source/widgets/data/datasets.md +++ /dev/null @@ -1,24 +0,0 @@ -Datasets -======== - -Load a dataset from an online repository. - -**Outputs** - -- Data: output dataset - -**Datasets** widget retrieves selected dataset from the server and sends it to the output. File is downloaded to the local memory and thus instantly available even without the internet connection. Each dataset is provided with a description and information on the data size, number of instances, number of variables, target and tags. - -![](images/Datasets-stamped.png) - -1. Information on the number of datasets available and the number of them downloaded to the local memory. -2. Content of available datasets. Each dataset is described with the size, number of instances and variables, type of the target variable and tags. -3. Formal description of the selected dataset. -4. If *Send Data Automatically* is ticked, selected dataset is communicated automatically. Alternatively, press *Send Data*. - -Example -------- - -Orange workflows can start with **Datasets** widget instead of **File** widget. In the example below, the widget retrieves a dataset from an online repository (Kickstarter data), which is subsequently sent to both the [Data Table](../data/datatable) and the [Distributions](../visualize/distributions). - -![](images/Datasets-Workflow.png) diff --git a/doc/visual-programming/source/widgets/data/datatable.md b/doc/visual-programming/source/widgets/data/datatable.md deleted file mode 100644 index 7baca5e96aa..00000000000 --- a/doc/visual-programming/source/widgets/data/datatable.md +++ /dev/null @@ -1,41 +0,0 @@ -Data Table -========== - -Displays attribute-value data in a spreadsheet. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Selected Data: instances selected from the table - -The **Data Table** widget receives one or more datasets in its input and presents them as a spreadsheet. Data instances may be sorted by attribute values. The widget also supports manual selection of data instances. - -![](images/DataTable-stamped.png) - -1. The name of the dataset (usually the input data file). Data - instances are in rows and their attribute values in columns. In this - example, the dataset is sorted by the attribute "sepal length". -2. Info on current dataset size and number and types of attributes -3. Values of continuous attributes can be visualized with bars; colors - can be attributed to different classes. -4. Data instances (rows) can be selected and sent to the widget's output - channel. -5. Use the *Restore Original Order* button to reorder data instances after - attribute-based sorting. -6. Produce a report. -7. While auto-send is on, all changes will be automatically communicated - to other widgets. Otherwise, press *Send Selected Rows*. - -Example -------- - -We used two [File](../data/file.md) widgets to read the *Iris* and *Glass* dataset (provided in Orange distribution), and send them to the **Data Table** widget. - -![](images/DataTable-Schema.png) - -Selected data instances in the first **Data Table** are passed to the second **Data Table**. Notice that we can select which dataset to view (iris or glass). Changing from one dataset to another alters the communicated selection of data instances if *Commit on any change* is selected. - -![](images/DataTable-Example.png) diff --git a/doc/visual-programming/source/widgets/data/discretize.md b/doc/visual-programming/source/widgets/data/discretize.md deleted file mode 100644 index d58557bf292..00000000000 --- a/doc/visual-programming/source/widgets/data/discretize.md +++ /dev/null @@ -1,46 +0,0 @@ -Discretize -========== - -Converts numeric attributes to categorical. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Data: dataset with discretized values - -The **Discretize** widget [discretizes](https://en.wikipedia.org/wiki/Discretization) numeric variables. - -![](images/Discretize.png) - -1. Set default method for discretization. - -2. Select variables to set specific discretization methods for each. Hovering over a variable shows intervals. - -3. Discretization methods - - - **Keep numeric** keeps the variable as it is. - - **Remove** removes variable. - - **Natural binning** finds nice thresholds for the variable's range of values, for instance 10, 20, 30 or 0.2, 0.4, 0.6, 0.8. We can set the desired number of bins; the actual number will depend on the interval. - - **Fixed width** uses a user-defined bin width. Boundaries of bins will be multiples of width. For instance, if the width is 10 and the variable's values range from 35 to 68, the resulting bins will be <40, 40-50, 50-60, >60. This method does not work for time variables. If the width is too large (resulting in a single interval) or too small (resulting in more than 100 intervals), the variable is removed. - - **Time interval** is similar to Fixed width, but for time variables. We specify the width and a time unit, e.g. 4 months or 3 days. Bin boundaries will be multiples of the interval; e.g. with 4 months, bins will always include Jan-Mar, Apr-Jun, Jul-Sep and Oct-Dec. - - **[Equal-frequency](http://www.saedsayad.com/unsupervised_binning.htm)** splits the attribute into a given number of intervals with approximately the same number of instances. - - [Equal-width](https://en.wikipedia.org/wiki/Data_binning) evenly splits the range between the smallest and the largest observed value. - - [Entropy-MDL](http://ijcai.org/Past%20Proceedings/IJCAI-93-VOL2/PDF/022.pdf) is a top-down discretization invented by Fayyad and Irani, which recursively splits the attribute at a cut maximizing information gain, until the gain is lower than the minimal description length of the cut. This discretization can result in an arbitrary number of intervals, including a single interval, in which case the variable is discarded as useless (removed). - - **Custom** allows entering an increasing, comma-separated list of thresholds. This is not applicable to time variables. - - **Use default setting** (enabled for particular settings and not default) sets the method to specified as "Default setting". - -4. The CC button sets the method for the currently selected variables to Custom, using their current thresholds. This allows for manual editing of automatically determined bins. - -Example -------- - -In the schema below, we took the *Heart disease* data set and -- discretized *age* to a fixed interval of 10 (years), -- *max HR* to approximately 6 bins (the closest match were 7 bins with a width of 25), -- removed *Cholesterol*, -- and used *entropy-mdl* for the remaining variables, which resulted in removing *rest SBP* and in two intervals for *ST by exercise* and *major vessels colored*. - -![](images/Discretize-Example.png) diff --git a/doc/visual-programming/source/widgets/data/editdomain.md b/doc/visual-programming/source/widgets/data/editdomain.md deleted file mode 100644 index 7f501ddb16c..00000000000 --- a/doc/visual-programming/source/widgets/data/editdomain.md +++ /dev/null @@ -1,44 +0,0 @@ -Edit Domain -=========== - -Rename features and their values. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Data: dataset with edited domain - -This widget can be used to edit/change a dataset's domain - rename features, rename or merge values of categorical features, add a categorical value, and assign labels. - -![](images/EditDomain-stamped.png) - -1. All features (including meta attributes) from the input dataset are listed in the *Variables* list. Selecting one feature displays an editor on the right. -2. Editing options: - - Change the name of the feature. - - Change the type of the feature. For example, convert a string variable to categorical. - - *Unlink variable from its source variable*. This option removes existing computation for a variable (say for Cluster how clustering was computed), making it 'plain'. This enables merging variables with same names in [Merge Data](../data/mergedata.md). - - Change the value names for discrete features in the *Values* list box. Double-click to edit the name. - - Add, remove or edit additional feature annotations in the *Labels* box. Add a new label with the + button and add the *Key* and *Value* for the new entry. Key will be displayed in the top left corner of the [Data Table](../data/datatable.md), while values will appear below the specified column. Remove an existing label with the - button. -3. Reorder or merge values of categorical features. To reorder the values (for example, to display them in [Distributions](../visualize/distributions.md), use the up and down keys at the bottom of the box. To add or remove a value, use + and - buttons. Select two or more variables and click = to merge them into a single value. Use the M button to merge variables on condition. -4. Rename the output table. Useful for displaying table names in [Venn Diagram](../visualize/venndiagram.md). -5. To revert the changes made to the selected feature, press the *Reset Selected* button while the feature is selected in the *Variables* list. Pressing *Reset All* will remove all the changes to the domain. Press *Apply* to send the new domain to the output. - -**Merging options** - -![](images/EditDomain-merge.png) - -- *Group selected values*: selected cateogorical values become a single variable. -- *Group values with less than N occurrences*: values which appear less than N times in the data, will be grouped into a single value. -- *Group values with less than % occurrences*: values which appear less then X % of the time in the data, will be grouped into a single value. -- *Group all except N most frequent values*: all values but the N most frequent will be grouped into a single variable. -- *New value name*: the name of the grouped value. - -Example -------- - -Below, we demonstrate how to simply edit an existing domain. We selected the *heart_disease.tab* dataset and edited the *gender* attribute. Where in the original we had the values *female* and *male*, we changed it into *F* for female and *M* for male. Then we used the down key to switch the order of the variables. Finally, we added a label to mark that the attribute is binary. We can observe the edited data in the [Data Table](../data/datatable.md) widget. - -![](images/EditDomain-Example.png) diff --git a/doc/visual-programming/source/widgets/data/featurestatistics.md b/doc/visual-programming/source/widgets/data/featurestatistics.md deleted file mode 100644 index 18932070fc1..00000000000 --- a/doc/visual-programming/source/widgets/data/featurestatistics.md +++ /dev/null @@ -1,40 +0,0 @@ -Feature Statistics -================== - -Show basic statistics for data features. - -**Inputs** - -- Data: input data - -**Outputs** - -- Reduced data: table containing only selected features -- Statistics: table containing statistics of the selected features - -The **Feature Statistics** widget provides a quick way to inspect and find interesting features in a given data set. - -![](images/feature_statistics-stamped.png) - -The Feature Statistics widget on the *heart-disease* data set. - -1. The feature type - can be categorical, numeric, time and string. -2. The name of the feature. -3. The histogram showing the distribution of feature's values. Values of numeric features are split into bins. -4. Further columns show different statistics. Mean, minimal and maximal value are computed only for numeric features. Mode shows the most common value for numeric or categorical feature. Dispersion shows [coefficient of variation](https://en.wikipedia.org/wiki/Coefficient_of_variation) for numeric features, and [entropy](https://en.wikipedia.org/wiki/Entropy_(information_theory)) for categorical. -5. The bars in the histogram can be further split by value of another variable. The default choice is the target variable, but the user can change this to an arbitrary feature or none. - -Example -------- - -The Feature Statistics widget is most often used after the [File](../data/file.md) widget to inspect and find potentially interesting features in the given data set. In the following examples, we use the *heart-disease* data set. - -![](images/feature_statistics_workflow.png) - -Once we have found a subset of potentially interesting features, or we have found features that we would like to exclude, we can simply select the features we want to keep. The widget outputs a new data set with only these features. - -![](images/feature_statistics_example1.png) - -Alternatively, if we want to store feature statistics, we can use the *Statistics* output and manipulate those values as needed. In this example, we display the statistics in a table. - -![](images/feature_statistics_example2.png) diff --git a/doc/visual-programming/source/widgets/data/file.md b/doc/visual-programming/source/widgets/data/file.md deleted file mode 100644 index 1bf9dbc4392..00000000000 --- a/doc/visual-programming/source/widgets/data/file.md +++ /dev/null @@ -1,53 +0,0 @@ - -File -==== - -Reads attribute-value data from an input file. - -**Outputs** - -- Data: dataset from the file - -The **File** widget [reads the input data file](../../loading-your-data/index.md) (data table with data instances) and sends the dataset to its output channel. The history of most recently opened files is maintained in the widget. The widget also includes a directory with sample datasets that come pre-installed with Orange. - -The widget reads data from Excel (**.xlsx**), simple tab-delimited (**.txt**), comma-separated files (**.csv**) or URLs. For other formats see Other Formats section below. - -![](images/File-stamped.png) - -1. Browse through previously opened data files, or load any of the sample ones. -2. Browse for a data file. -3. Reloads currently selected data file. -4. Insert data from URL addresses, including data from Google Sheets. -5. Information on the loaded dataset: dataset size, number and types of data features. -6. Additional information on the features in the dataset. Features can be edited by double-clicking on them. The user can change the attribute names, select the type of variable per each attribute (*Continuous*, *Nominal*, *String*, *Datetime*), and choose how to further define the attributes (as *Features*, *Targets* or *Meta*). The user can also decide to ignore an attribute. -7. Browse documentation datasets. -8. Produce a report. - -Example -------- - -Most Orange workflows would probably start with the **File** widget. In the schema below, the widget is used to read the data that is sent to both the [Data Table](../data/datatable.md) and the [Box Plot](../visualize/boxplot.md) widget. - -![](images/File-Workflow.png) - -### Loading your data - -- Orange can import any comma, .xlsx or tab-delimited data file or URL. Use the **File** widget and then, if needed, select class and meta attributes. -- To specify the domain and the type of the attribute, attribute names can be preceded with a label followed by a hash. Use c for class and m for meta attribute, i to ignore a column, and C, D, S for continuous, discrete and string attribute types. Examples: C#mpg, mS#name, i#dummy. -- Orange's native format is a tab-delimited text file with three header rows. The first row contains attribute names, the second the type (*continuous*, *discrete* or *string*), and the third the optional element (*class*, *meta* or *time*). - -![](images/spreadsheet-simple-head1.png) - -Read more on loading your data [here](../../loading-your-data/index.md). - -### Other Formats - -Supported formats and the widgets to load them: - -- distance matrix: [Distance File](../unsupervised/distancefile.md) -- predictive model: [Load Model](../model/loadmodel.md) -- network: Network File from Network add-on -- images: Import Images from Image Analytics add-on -- text/corpus: Corpus or Import Documents from Text add-on -- single cell data: Load Data from Single Cell add-on -- several spectroscopy files: Multifile from Spectroscopy add-on diff --git a/doc/visual-programming/source/widgets/data/formula.md b/doc/visual-programming/source/widgets/data/formula.md deleted file mode 100644 index 3f48776ed0e..00000000000 --- a/doc/visual-programming/source/widgets/data/formula.md +++ /dev/null @@ -1,58 +0,0 @@ -Formula -======= - -Add new features to your dataset. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Data: dataset with additional features - -**Formula** allows computing new columns by combining the existing ones with a user-defined expression. The resulting column can be categorical, numerical or textual. - -For numeric variables, it sufices to provide a name and an expression. - -![](images/feature-constructor1-stamped.png) - -1. List of constructed variables -2. Add or remove variables -3. New feature name -4. Expression in Python -5. Select a feature -6. Select a function -7. Produce a report -8. Press *Send* to communicate changes - -The following example shows construction of a categorical variable: its value is "lower" is "sepal length" is below 6, "mid" if it is at least 6 but below 7, and "higher" otherwise. Note that spaces need to be replaced by underscores (`sepal_length`). - -![](images/feature-constructor2-stamped.png) - -1. List of variable definitions -2. Add or remove variables -3. New feature name -4. Expression in Python -5. If checked, the feature is put among meta attributes -6. Select a feature to use in expression -7. Select a function to use in expression -8. Optional list of values, used to define their order -9. Press *Send* to compute and output data - -Hints ------ - -If you are unfamiliar with Python math language, here's a quick introduction. - -Expressions can use the following operators: -- `+`, `-`, `*`, `/`: addition, subtraction, multiplication, division -- `//`: integer division -- `%`: remainder after integer division -- `**`: exponentiation (for square root square by 0.5) -- `<`, `>`, `<=`, `>=` less than, greater than, less or equal, greater or equal -- `==` equal -- `!=` not equal -- if-else: *value* `if` *condition* else *other-value* (see the above example - -See more [here](http://www.tutorialspoint.com/python/python_basic_operators.htm). diff --git a/doc/visual-programming/source/widgets/data/groupby.md b/doc/visual-programming/source/widgets/data/groupby.md deleted file mode 100644 index 0aa10789372..00000000000 --- a/doc/visual-programming/source/widgets/data/groupby.md +++ /dev/null @@ -1,39 +0,0 @@ -Group by -======== - -Groups data by selected variables and aggregate columns with selected aggregations. - -**Inputs** - -- Data: input data table - -**Outputs** - -- Data: aggregated data - -Group By widget first identifies groups based on selected variables in the **Group by** list. Groups are defined by all distinct combinations of values in selected variables. - -In the second step, the widget computes aggregations defined in the table on the right side of the widget for each group. - - -![](images/Group-by-stamped.png) - -1. Select variables that define groups -2. View variables and their aggregations. To change aggregation for one or more variables, select them in the table. -3. Change aggregations for variables selected in the view above. -4. When the *Send automatically* box is ticked, all changes will be automatically communicated to other widgets. -5. Get documentation, observe a number of items on input or output - -Examples --------- - -We first load **heart_disease** dataset in the **File** widget. In the **Group By** widget, we set variables that define groups -- **diameter narrowing** and **gender**. Each group includes items (rows) that belong to one combination of both variables. - -In the table on the right-hand side of the widget, we set that we want to compute **mean** and **median** for values of **rest SBP** variable in each group, **median** for values of **cholesterol** variable, and **mean** for **major vessels colored**. - -In the **Data Table** widget, we can see that both females and males have lower average values for **rest SBP** when **diameter narrowing** is 0. The difference is greater for females. The median of **rest SBP** is different only for females, while for males is the same. - -You can also observe differences between median **cholesterol** level and mean value of **major vessel colored** between groups. - - -![](images/Group-by-example.png) diff --git a/doc/visual-programming/source/widgets/data/icons/color.png b/doc/visual-programming/source/widgets/data/icons/color.png deleted file mode 100644 index 192f39716eb..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/color.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/concatenate.png b/doc/visual-programming/source/widgets/data/icons/concatenate.png deleted file mode 100644 index f24562ed627..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/concatenate.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/continuize.png b/doc/visual-programming/source/widgets/data/icons/continuize.png deleted file mode 100644 index ffc3437818a..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/continuize.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/correlations.png b/doc/visual-programming/source/widgets/data/icons/correlations.png deleted file mode 100644 index 2c54c163ea4..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/correlations.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/create-class.png b/doc/visual-programming/source/widgets/data/icons/create-class.png deleted file mode 100755 index f2972f7eb03..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/create-class.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/csvfileimport.png b/doc/visual-programming/source/widgets/data/icons/csvfileimport.png deleted file mode 100644 index 4ed280ef10b..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/csvfileimport.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/data-info.png b/doc/visual-programming/source/widgets/data/icons/data-info.png deleted file mode 100644 index 826df8baedd..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/data-info.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/data-sampler.png b/doc/visual-programming/source/widgets/data/icons/data-sampler.png deleted file mode 100644 index 7a4f7bf1dc5..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/data-sampler.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/data-table.png b/doc/visual-programming/source/widgets/data/icons/data-table.png deleted file mode 100644 index e3bbdcc63a8..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/data-table.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/datasets.png b/doc/visual-programming/source/widgets/data/icons/datasets.png deleted file mode 100755 index f093bce6ba3..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/datasets.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/discretize.png b/doc/visual-programming/source/widgets/data/icons/discretize.png deleted file mode 100644 index 3a9ac1b6bb2..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/discretize.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/edit-domain.png b/doc/visual-programming/source/widgets/data/icons/edit-domain.png deleted file mode 100644 index e8a4a2f1701..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/edit-domain.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/feature-constructor.png b/doc/visual-programming/source/widgets/data/icons/feature-constructor.png deleted file mode 100644 index 5d1770d544f..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/feature-constructor.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/file.png b/doc/visual-programming/source/widgets/data/icons/file.png deleted file mode 100644 index 269ff71b388..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/file.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/impute.png b/doc/visual-programming/source/widgets/data/icons/impute.png deleted file mode 100644 index 483acb25bcd..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/impute.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/merge-data.png b/doc/visual-programming/source/widgets/data/icons/merge-data.png deleted file mode 100644 index ef386f8f47d..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/merge-data.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/neighbors.png b/doc/visual-programming/source/widgets/data/icons/neighbors.png deleted file mode 100644 index 69d1ab1d0ba..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/neighbors.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/outliers.png b/doc/visual-programming/source/widgets/data/icons/outliers.png deleted file mode 100644 index 864355c7a6e..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/outliers.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/paint-data.png b/doc/visual-programming/source/widgets/data/icons/paint-data.png deleted file mode 100644 index f50ab440806..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/paint-data.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/pivot.png b/doc/visual-programming/source/widgets/data/icons/pivot.png deleted file mode 100644 index 3249deee4f1..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/pivot.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/preprocess.png b/doc/visual-programming/source/widgets/data/icons/preprocess.png deleted file mode 100644 index da4355ba09c..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/preprocess.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/purge-domain.png b/doc/visual-programming/source/widgets/data/icons/purge-domain.png deleted file mode 100644 index 88df4d94ed8..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/purge-domain.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/python-script.png b/doc/visual-programming/source/widgets/data/icons/python-script.png deleted file mode 100644 index 89980ef4d34..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/python-script.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/randomize.png b/doc/visual-programming/source/widgets/data/icons/randomize.png deleted file mode 100755 index 6f4e461b62e..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/randomize.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/rank.png b/doc/visual-programming/source/widgets/data/icons/rank.png deleted file mode 100644 index 4a6512235c5..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/rank.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/save.png b/doc/visual-programming/source/widgets/data/icons/save.png deleted file mode 100644 index ad8bb786168..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/save.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/select-by-data-index.png b/doc/visual-programming/source/widgets/data/icons/select-by-data-index.png deleted file mode 100644 index 8159dcf0891..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/select-by-data-index.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/select-columns.png b/doc/visual-programming/source/widgets/data/icons/select-columns.png deleted file mode 100644 index 461dc450d47..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/select-columns.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/select-rows.png b/doc/visual-programming/source/widgets/data/icons/select-rows.png deleted file mode 100644 index 523873a9371..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/select-rows.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/sql-table.png b/doc/visual-programming/source/widgets/data/icons/sql-table.png deleted file mode 100644 index 109d6910296..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/sql-table.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/icons/transpose.png b/doc/visual-programming/source/widgets/data/icons/transpose.png deleted file mode 100644 index f4e37b5192a..00000000000 Binary files a/doc/visual-programming/source/widgets/data/icons/transpose.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/AggregateColumns-Example.png b/doc/visual-programming/source/widgets/data/images/AggregateColumns-Example.png deleted file mode 100644 index 28856d98818..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/AggregateColumns-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/AggregateColumns.png b/doc/visual-programming/source/widgets/data/images/AggregateColumns.png deleted file mode 100644 index 9abe1b9451c..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/AggregateColumns.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/ApplyDomain-Example.png b/doc/visual-programming/source/widgets/data/images/ApplyDomain-Example.png deleted file mode 100644 index b6d6ce4751f..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/ApplyDomain-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/ApplyDomain.png b/doc/visual-programming/source/widgets/data/images/ApplyDomain.png deleted file mode 100644 index 5f6596ffb25..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/ApplyDomain.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/CSVFileImport-Example.png b/doc/visual-programming/source/widgets/data/images/CSVFileImport-Example.png deleted file mode 100644 index c2dc4ae5816..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/CSVFileImport-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/CSVFileImport-ImportOptions-stamped.png b/doc/visual-programming/source/widgets/data/images/CSVFileImport-ImportOptions-stamped.png deleted file mode 100644 index e7d6a6fb4cd..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/CSVFileImport-ImportOptions-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/CSVFileImport-encodings.png b/doc/visual-programming/source/widgets/data/images/CSVFileImport-encodings.png deleted file mode 100644 index 2b30fa5a186..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/CSVFileImport-encodings.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/CSVFileImport-widget-stamped.png b/doc/visual-programming/source/widgets/data/images/CSVFileImport-widget-stamped.png deleted file mode 100644 index 0ea67c50ab8..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/CSVFileImport-widget-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Color-Continuous_unindexed.png b/doc/visual-programming/source/widgets/data/images/Color-Continuous_unindexed.png deleted file mode 100644 index 21dc6fa04d9..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Color-Continuous_unindexed.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Color-Example-Continuous.png b/doc/visual-programming/source/widgets/data/images/Color-Example-Continuous.png deleted file mode 100644 index d4aeec31ef8..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Color-Example-Continuous.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Color-Example-Discrete.png b/doc/visual-programming/source/widgets/data/images/Color-Example-Discrete.png deleted file mode 100644 index a68a69db838..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Color-Example-Discrete.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Color-stamped.png b/doc/visual-programming/source/widgets/data/images/Color-stamped.png deleted file mode 100644 index 295c8be7b9e..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Color-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Concatenate-Example.png b/doc/visual-programming/source/widgets/data/images/Concatenate-Example.png deleted file mode 100644 index c0740e1faaf..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Concatenate-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Concatenate.png b/doc/visual-programming/source/widgets/data/images/Concatenate.png deleted file mode 100644 index 41ccacbea87..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Concatenate.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Continuize-Example1.png b/doc/visual-programming/source/widgets/data/images/Continuize-Example1.png deleted file mode 100644 index cbd915b5f87..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Continuize-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Continuize-Example2.png b/doc/visual-programming/source/widgets/data/images/Continuize-Example2.png deleted file mode 100644 index 5861e307fe7..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Continuize-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Continuize-stamped.png b/doc/visual-programming/source/widgets/data/images/Continuize-stamped.png deleted file mode 100644 index 3bd67ffda14..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Continuize-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Correlations-Example.png b/doc/visual-programming/source/widgets/data/images/Correlations-Example.png deleted file mode 100644 index 7babef0cb28..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Correlations-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Correlations-links.png b/doc/visual-programming/source/widgets/data/images/Correlations-links.png deleted file mode 100644 index cce0c9fcb04..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Correlations-links.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Correlations-stamped.png b/doc/visual-programming/source/widgets/data/images/Correlations-stamped.png deleted file mode 100644 index 30cc12b598e..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Correlations-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/CreateClass-example.png b/doc/visual-programming/source/widgets/data/images/CreateClass-example.png deleted file mode 100644 index ff41b5367cb..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/CreateClass-example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/CreateClass-stamped.png b/doc/visual-programming/source/widgets/data/images/CreateClass-stamped.png deleted file mode 100644 index 734652ce7a2..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/CreateClass-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/CreateInstance-example.png b/doc/visual-programming/source/widgets/data/images/CreateInstance-example.png deleted file mode 100644 index 5b78b9d0729..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/CreateInstance-example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/CreateInstance-example2.png b/doc/visual-programming/source/widgets/data/images/CreateInstance-example2.png deleted file mode 100644 index 164f2a66276..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/CreateInstance-example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/CreateInstance-stamped.png b/doc/visual-programming/source/widgets/data/images/CreateInstance-stamped.png deleted file mode 100644 index c39e7985813..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/CreateInstance-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/DataInfo-Example.png b/doc/visual-programming/source/widgets/data/images/DataInfo-Example.png deleted file mode 100644 index 02bd55366b0..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/DataInfo-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/DataSampler-Example-OverUnderSampling.png b/doc/visual-programming/source/widgets/data/images/DataSampler-Example-OverUnderSampling.png deleted file mode 100644 index 6bce502c095..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/DataSampler-Example-OverUnderSampling.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/DataSampler-Example1.png b/doc/visual-programming/source/widgets/data/images/DataSampler-Example1.png deleted file mode 100644 index ce84863eeee..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/DataSampler-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/DataSampler-Example2.png b/doc/visual-programming/source/widgets/data/images/DataSampler-Example2.png deleted file mode 100644 index 0fefceeea69..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/DataSampler-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/DataSampler-stamped.png b/doc/visual-programming/source/widgets/data/images/DataSampler-stamped.png deleted file mode 100644 index 8b22141ab8a..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/DataSampler-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/DataTable-Example.png b/doc/visual-programming/source/widgets/data/images/DataTable-Example.png deleted file mode 100644 index e3aed14b44e..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/DataTable-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/DataTable-Schema.png b/doc/visual-programming/source/widgets/data/images/DataTable-Schema.png deleted file mode 100644 index 8f5d85ae4d8..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/DataTable-Schema.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/DataTable-stamped.png b/doc/visual-programming/source/widgets/data/images/DataTable-stamped.png deleted file mode 100644 index 580d94877b1..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/DataTable-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Datasets-Workflow.png b/doc/visual-programming/source/widgets/data/images/Datasets-Workflow.png deleted file mode 100644 index b79878f7987..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Datasets-Workflow.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Datasets-stamped.png b/doc/visual-programming/source/widgets/data/images/Datasets-stamped.png deleted file mode 100644 index 20025ddba9d..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Datasets-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Discretize-Example.png b/doc/visual-programming/source/widgets/data/images/Discretize-Example.png deleted file mode 100644 index 3db56a32d89..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Discretize-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Discretize.png b/doc/visual-programming/source/widgets/data/images/Discretize.png deleted file mode 100644 index 11f80cf05c8..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Discretize.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/EditDomain-Example.png b/doc/visual-programming/source/widgets/data/images/EditDomain-Example.png deleted file mode 100644 index 636d6572518..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/EditDomain-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/EditDomain-merge.png b/doc/visual-programming/source/widgets/data/images/EditDomain-merge.png deleted file mode 100644 index 4440a5d3f4a..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/EditDomain-merge.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/EditDomain-stamped.png b/doc/visual-programming/source/widgets/data/images/EditDomain-stamped.png deleted file mode 100644 index 295ebfe92d1..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/EditDomain-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/File-Workflow.png b/doc/visual-programming/source/widgets/data/images/File-Workflow.png deleted file mode 100644 index 70c5905448b..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/File-Workflow.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/File-stamped.png b/doc/visual-programming/source/widgets/data/images/File-stamped.png deleted file mode 100644 index 9af594dce83..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/File-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/File.png b/doc/visual-programming/source/widgets/data/images/File.png deleted file mode 100644 index f8a375d03a9..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/File.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Group-by-example.png b/doc/visual-programming/source/widgets/data/images/Group-by-example.png deleted file mode 100644 index 3050bb47636..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Group-by-example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Group-by-stamped.png b/doc/visual-programming/source/widgets/data/images/Group-by-stamped.png deleted file mode 100644 index 47f0d20377e..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Group-by-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Impute-Example.png b/doc/visual-programming/source/widgets/data/images/Impute-Example.png deleted file mode 100644 index fc6aa01bea3..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Impute-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Impute.png b/doc/visual-programming/source/widgets/data/images/Impute.png deleted file mode 100644 index 6db0b1c660f..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Impute.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Melt-Default-stamped.png b/doc/visual-programming/source/widgets/data/images/Melt-Default-stamped.png deleted file mode 100644 index 58f1e0e0b3b..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Melt-Default-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Melt-Distribution.png b/doc/visual-programming/source/widgets/data/images/Melt-Distribution.png deleted file mode 100644 index 5af5c8773b4..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Melt-Distribution.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Melt-Workflow.png b/doc/visual-programming/source/widgets/data/images/Melt-Workflow.png deleted file mode 100644 index 5ace5665916..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Melt-Workflow.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Merge-Data-stamped.png b/doc/visual-programming/source/widgets/data/images/Merge-Data-stamped.png deleted file mode 100644 index a2772453720..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Merge-Data-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Merge-Data_Example.png b/doc/visual-programming/source/widgets/data/images/Merge-Data_Example.png deleted file mode 100644 index 83d5f94caeb..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Merge-Data_Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Merge-Data_excel.png b/doc/visual-programming/source/widgets/data/images/Merge-Data_excel.png deleted file mode 100644 index 9e4425fefaa..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Merge-Data_excel.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/MergeData-Example.png b/doc/visual-programming/source/widgets/data/images/MergeData-Example.png deleted file mode 100644 index 0a80d816d41..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/MergeData-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/MergeData-Example2.png b/doc/visual-programming/source/widgets/data/images/MergeData-Example2.png deleted file mode 100644 index 4e8f9f7fbe7..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/MergeData-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/MergeData-Example3.png b/doc/visual-programming/source/widgets/data/images/MergeData-Example3.png deleted file mode 100644 index fef3b1286f0..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/MergeData-Example3.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/MergeData-InstanceID.png b/doc/visual-programming/source/widgets/data/images/MergeData-InstanceID.png deleted file mode 100644 index 9b2040357ef..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/MergeData-InstanceID.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/MergeData-multiple.png b/doc/visual-programming/source/widgets/data/images/MergeData-multiple.png deleted file mode 100644 index 4b6d5434daf..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/MergeData-multiple.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/MergeData-multiple2.png b/doc/visual-programming/source/widgets/data/images/MergeData-multiple2.png deleted file mode 100644 index 8752ce6bc69..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/MergeData-multiple2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/MergeData-stamped.png b/doc/visual-programming/source/widgets/data/images/MergeData-stamped.png deleted file mode 100644 index fb51525ded6..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/MergeData-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/MergeData_Append.png b/doc/visual-programming/source/widgets/data/images/MergeData_Append.png deleted file mode 100644 index 4afec48547f..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/MergeData_Append.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/MergeData_Concatenate.png b/doc/visual-programming/source/widgets/data/images/MergeData_Concatenate.png deleted file mode 100644 index 55db366c4c2..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/MergeData_Concatenate.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/MergeData_Intersection.png b/doc/visual-programming/source/widgets/data/images/MergeData_Intersection.png deleted file mode 100644 index 2a9323c6d5c..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/MergeData_Intersection.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Outliers-Example.png b/doc/visual-programming/source/widgets/data/images/Outliers-Example.png deleted file mode 100644 index 0271a494fef..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Outliers-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Outliers-stamped.png b/doc/visual-programming/source/widgets/data/images/Outliers-stamped.png deleted file mode 100644 index 3acaa491f06..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Outliers-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Outliers.png b/doc/visual-programming/source/widgets/data/images/Outliers.png deleted file mode 100644 index 4956da6670f..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Outliers.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/PaintData-Example.png b/doc/visual-programming/source/widgets/data/images/PaintData-Example.png deleted file mode 100644 index 1238d933560..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/PaintData-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/PaintData-stamped.png b/doc/visual-programming/source/widgets/data/images/PaintData-stamped.png deleted file mode 100644 index e98f6946ac0..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/PaintData-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/PaintData.png b/doc/visual-programming/source/widgets/data/images/PaintData.png deleted file mode 100644 index dbf49ec6363..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/PaintData.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Pivot-continuous.png b/doc/visual-programming/source/widgets/data/images/Pivot-continuous.png deleted file mode 100644 index 1c7be5202d0..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Pivot-continuous.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Pivot-discrete.png b/doc/visual-programming/source/widgets/data/images/Pivot-discrete.png deleted file mode 100644 index 0df3a4bb6a8..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Pivot-discrete.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Pivot-example.png b/doc/visual-programming/source/widgets/data/images/Pivot-example.png deleted file mode 100644 index 6b51cf2f956..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Pivot-example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Pivot-stamped.png b/doc/visual-programming/source/widgets/data/images/Pivot-stamped.png deleted file mode 100644 index 2113158d1d6..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Pivot-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Preprocess-Example1.png b/doc/visual-programming/source/widgets/data/images/Preprocess-Example1.png deleted file mode 100644 index 99b71b48e71..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Preprocess-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Preprocess-Example2.png b/doc/visual-programming/source/widgets/data/images/Preprocess-Example2.png deleted file mode 100644 index 7cb03aa5b0a..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Preprocess-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Preprocess-Models1.png b/doc/visual-programming/source/widgets/data/images/Preprocess-Models1.png deleted file mode 100644 index 4842233bb03..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Preprocess-Models1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Preprocess-Models2.png b/doc/visual-programming/source/widgets/data/images/Preprocess-Models2.png deleted file mode 100644 index 5810d43a8f6..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Preprocess-Models2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Preprocess-Models3.png b/doc/visual-programming/source/widgets/data/images/Preprocess-Models3.png deleted file mode 100644 index d873f821f2e..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Preprocess-Models3.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Preprocess1.png b/doc/visual-programming/source/widgets/data/images/Preprocess1.png deleted file mode 100644 index 427dd6719a7..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Preprocess1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Preprocess2.png b/doc/visual-programming/source/widgets/data/images/Preprocess2.png deleted file mode 100644 index ce690749153..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Preprocess2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/PurgeDomain-example.png b/doc/visual-programming/source/widgets/data/images/PurgeDomain-example.png deleted file mode 100644 index cdd40b50e38..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/PurgeDomain-example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/PurgeDomain-stamped.png b/doc/visual-programming/source/widgets/data/images/PurgeDomain-stamped.png deleted file mode 100644 index 319b6c074c5..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/PurgeDomain-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/PythonScript-Example3.png b/doc/visual-programming/source/widgets/data/images/PythonScript-Example3.png deleted file mode 100644 index febef3ecb37..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/PythonScript-Example3.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/PythonScript-filtering.png b/doc/visual-programming/source/widgets/data/images/PythonScript-filtering.png deleted file mode 100644 index 19699249faa..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/PythonScript-filtering.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/PythonScript-gauss.png b/doc/visual-programming/source/widgets/data/images/PythonScript-gauss.png deleted file mode 100644 index 3f4456a7a5a..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/PythonScript-gauss.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/PythonScript-round.png b/doc/visual-programming/source/widgets/data/images/PythonScript-round.png deleted file mode 100644 index 0e7c08882d5..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/PythonScript-round.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/PythonScript-stamped.png b/doc/visual-programming/source/widgets/data/images/PythonScript-stamped.png deleted file mode 100644 index 5b5906db656..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/PythonScript-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/PythonScript.png b/doc/visual-programming/source/widgets/data/images/PythonScript.png deleted file mode 100644 index 2dc4db920a9..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/PythonScript.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Randomize-Default.png b/doc/visual-programming/source/widgets/data/images/Randomize-Default.png deleted file mode 100644 index e091e530e85..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Randomize-Default.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Randomize-Example1.png b/doc/visual-programming/source/widgets/data/images/Randomize-Example1.png deleted file mode 100644 index f71d9138795..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Randomize-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Randomize-Example2.png b/doc/visual-programming/source/widgets/data/images/Randomize-Example2.png deleted file mode 100644 index f1d956c3f3c..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Randomize-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Rank-Select-Schema.png b/doc/visual-programming/source/widgets/data/images/Rank-Select-Schema.png deleted file mode 100644 index 253b4203d78..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Rank-Select-Schema.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Rank-Select-Widgets.png b/doc/visual-programming/source/widgets/data/images/Rank-Select-Widgets.png deleted file mode 100644 index 500bfef99a6..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Rank-Select-Widgets.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Rank-and-Test.png b/doc/visual-programming/source/widgets/data/images/Rank-and-Test.png deleted file mode 100644 index 6ab41010ea5..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Rank-and-Test.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Rank-stamped.png b/doc/visual-programming/source/widgets/data/images/Rank-stamped.png deleted file mode 100644 index 834ec26fe2a..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Rank-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/SQLTable-Example.png b/doc/visual-programming/source/widgets/data/images/SQLTable-Example.png deleted file mode 100644 index 7fad076a1a4..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/SQLTable-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/SQLTable-stamped.png b/doc/visual-programming/source/widgets/data/images/SQLTable-stamped.png deleted file mode 100644 index 123f3171cd9..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/SQLTable-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Save-Workflow.png b/doc/visual-programming/source/widgets/data/images/Save-Workflow.png deleted file mode 100644 index c565596b2df..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Save-Workflow.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/SaveData.png b/doc/visual-programming/source/widgets/data/images/SaveData.png deleted file mode 100644 index 6ed2933dcf1..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/SaveData.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Select-by-Data-Index-Example1.png b/doc/visual-programming/source/widgets/data/images/Select-by-Data-Index-Example1.png deleted file mode 100644 index 0c7ad29b3f1..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Select-by-Data-Index-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Select-by-Data-Index-stamped.png b/doc/visual-programming/source/widgets/data/images/Select-by-Data-Index-stamped.png deleted file mode 100644 index 9de3a6b358f..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Select-by-Data-Index-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/SelectColumns-Example1.png b/doc/visual-programming/source/widgets/data/images/SelectColumns-Example1.png deleted file mode 100644 index 53d3d89b8e4..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/SelectColumns-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/SelectColumns-Example2.png b/doc/visual-programming/source/widgets/data/images/SelectColumns-Example2.png deleted file mode 100644 index 00ef1928805..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/SelectColumns-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/SelectColumns-stamped.png b/doc/visual-programming/source/widgets/data/images/SelectColumns-stamped.png deleted file mode 100644 index 91751808b74..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/SelectColumns-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/SelectColumns2-Workflow.png b/doc/visual-programming/source/widgets/data/images/SelectColumns2-Workflow.png deleted file mode 100644 index 1d0f2d188ff..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/SelectColumns2-Workflow.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/SelectRows-Example.png b/doc/visual-programming/source/widgets/data/images/SelectRows-Example.png deleted file mode 100644 index ab8065f57fe..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/SelectRows-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/SelectRows-Workflow.png b/doc/visual-programming/source/widgets/data/images/SelectRows-Workflow.png deleted file mode 100644 index 037c0a50b1c..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/SelectRows-Workflow.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/SelectRows-schema.png b/doc/visual-programming/source/widgets/data/images/SelectRows-schema.png deleted file mode 100644 index 595a5b3f906..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/SelectRows-schema.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/SelectRows-stamped.png b/doc/visual-programming/source/widgets/data/images/SelectRows-stamped.png deleted file mode 100644 index eceaf43993a..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/SelectRows-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Unique-Example.png b/doc/visual-programming/source/widgets/data/images/Unique-Example.png deleted file mode 100644 index 4b6e71bcb7c..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Unique-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/Unique-stamped.png b/doc/visual-programming/source/widgets/data/images/Unique-stamped.png deleted file mode 100644 index 58d4fe8f7a8..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/Unique-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/data-info-stamped.png b/doc/visual-programming/source/widgets/data/images/data-info-stamped.png deleted file mode 100644 index e36a825ea0d..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/data-info-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/feature-constructor1-stamped.png b/doc/visual-programming/source/widgets/data/images/feature-constructor1-stamped.png deleted file mode 100644 index f3e8672380d..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/feature-constructor1-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/feature-constructor1.png b/doc/visual-programming/source/widgets/data/images/feature-constructor1.png deleted file mode 100644 index feccd3dbf64..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/feature-constructor1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/feature-constructor2-stamped.png b/doc/visual-programming/source/widgets/data/images/feature-constructor2-stamped.png deleted file mode 100644 index 5a37459302c..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/feature-constructor2-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/feature_statistics-stamped.png b/doc/visual-programming/source/widgets/data/images/feature_statistics-stamped.png deleted file mode 100644 index 23ab51d7ba7..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/feature_statistics-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/feature_statistics_example1.png b/doc/visual-programming/source/widgets/data/images/feature_statistics_example1.png deleted file mode 100644 index c55073b00a1..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/feature_statistics_example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/feature_statistics_example2.png b/doc/visual-programming/source/widgets/data/images/feature_statistics_example2.png deleted file mode 100644 index a3aa56f2994..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/feature_statistics_example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/feature_statistics_workflow.png b/doc/visual-programming/source/widgets/data/images/feature_statistics_workflow.png deleted file mode 100644 index aa04253e2a6..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/feature_statistics_workflow.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/impute-stamped.png b/doc/visual-programming/source/widgets/data/images/impute-stamped.png deleted file mode 100644 index 717f8264bc4..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/impute-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/neighbours-example-multiple.png b/doc/visual-programming/source/widgets/data/images/neighbours-example-multiple.png deleted file mode 100644 index 09e6a8b5119..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/neighbours-example-multiple.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/neighbours-example1.png b/doc/visual-programming/source/widgets/data/images/neighbours-example1.png deleted file mode 100644 index 3a0465d77cc..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/neighbours-example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/neighbours-example2.png b/doc/visual-programming/source/widgets/data/images/neighbours-example2.png deleted file mode 100644 index 0a654d2eba7..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/neighbours-example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/neighbours-stamped.png b/doc/visual-programming/source/widgets/data/images/neighbours-stamped.png deleted file mode 100644 index a8ec4f1f524..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/neighbours-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/preprocess-stamped.png b/doc/visual-programming/source/widgets/data/images/preprocess-stamped.png deleted file mode 100644 index dc8840214f5..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/preprocess-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/spreadsheet-simple-head1.png b/doc/visual-programming/source/widgets/data/images/spreadsheet-simple-head1.png deleted file mode 100644 index e9bc80ce557..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/spreadsheet-simple-head1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/transpose-example.png b/doc/visual-programming/source/widgets/data/images/transpose-example.png deleted file mode 100644 index 9782461b6cb..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/transpose-example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/images/transpose-stamped.png b/doc/visual-programming/source/widgets/data/images/transpose-stamped.png deleted file mode 100644 index 8387e54049e..00000000000 Binary files a/doc/visual-programming/source/widgets/data/images/transpose-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/data/impute.md b/doc/visual-programming/source/widgets/data/impute.md deleted file mode 100644 index fa850850718..00000000000 --- a/doc/visual-programming/source/widgets/data/impute.md +++ /dev/null @@ -1,38 +0,0 @@ -Impute -====== - -Replaces unknown values in the data. - -**Inputs** - -- Data: input dataset -- Learner: learning algorithm for imputation - -**Outputs** - -- Data: dataset with imputed values - -Some Orange's algorithms and visualizations cannot handle unknown values in the data. This widget does what statisticians call imputation: it substitutes missing values by values either computed from the data or set by the user. The default imputation is (1-NN). - -![](images/impute-stamped.png) - -1. In the top-most box, *Default method*, the user can specify a general imputation technique for all attributes. - - **Don't Impute** does nothing with the missing values. - - **Average/Most-frequent** uses the average value (for continuous attributes) or the most common value (for discrete attributes). - - **As a distinct value** creates new values to substitute the missing ones. - - **Model-based imputer** constructs a model for predicting the missing value, based on values of other attributes; a separate model is constructed for each attribute. The default model is 1-NN learner, which takes the value from the most similar example (this is sometimes referred to as hot deck imputation). This algorithm can be substituted by one that the user connects to the input signal Learner for Imputation. Note, however, that if there are discrete and continuous attributes in the data, the algorithm needs to be capable of handling them both; at the moment only 1-NN learner can do that. (In the future, when Orange has more regressors, the Impute widget may have separate input signals for discrete and continuous models.) - - **Random values** computes the distributions of values for each attribute and then imputes by picking random values from them. - - **Remove examples with missing values** removes the example containing missing values. This check also applies to the class attribute if *Impute class values* is checked. - -2. It is possible to specify individual treatment for each attribute, which overrides the default treatment set. One can also specify a manually defined value used for imputation. In the screenshot, we decided not to impute the values of "*normalized-losses*" and "*make*", the missing values of "*aspiration*" will be replaced by random values, while the missing values of "*body-style*" and "*drive-wheels*" are replaced by "*hatchback*" and "*fwd*",respectively. If the values of "*length*", "*width*" or "*height*" are missing, the example is discarded. Values of all other attributes use the default method set above (model-based imputer, in our case). -3. The imputation methods for individual attributes are the same as default methods. -4. *Restore All to Default* resets the individual attribute treatments to default. -5. Produce a report. -6. All changes are committed immediately if *Apply automatically* is checked. Otherwise, *Apply* needs to be ticked to apply any new settings. - -Example -------- - -To demonstrate how the **Impute** widget works, we played around with the *Iris* dataset and deleted some of the data. We used the **Impute** widget and selected the *Model-based imputer* to impute the missing values. In another [Data Table](../data/datatable.md), we see how the question marks turned into distinct values ("Iris-setosa, "Iris-versicolor"). - -![](images/Impute-Example.png) diff --git a/doc/visual-programming/source/widgets/data/melt.md b/doc/visual-programming/source/widgets/data/melt.md deleted file mode 100644 index eaa43346c57..00000000000 --- a/doc/visual-programming/source/widgets/data/melt.md +++ /dev/null @@ -1,36 +0,0 @@ -Melt -========= - -Transform [wide data to narrow](https://en.wikipedia.org/wiki/Wide_and_narrow_data). - -**Inputs** - -- Data: wide data table - -**Outputs** - -- Data: narrow data table - -The **Melt** widget receives a dataset in the more common wide format and outputs a table of (row_id, variable, value) triplets. - - -![](images/Melt-Default-stamped.png) - -1. Select the variable used as id. The widget offers only columns without duplicated values. Alternatively, row number can be used as id. -2. Select whether to include non-numeric variables, and whether to exclude zero values. -3. Set the names of the columns with name of the variable ("item") and the corresponding value. - -Example -------- - -In the following workflow we play with the Zoo data set, in which we convert all variables to numeric by treating them as ordinal. All variables except the number of legs boolean (e.g. the animal lays or does not lay eggs), so a value of 1 will correspond to an animal having a particular feature. In data table we select all rows (Ctrl-A or Cmd-A) and deselect the duplicate description of the frog in order to avoid duplicate values in the "name" column. - -We pass it to Melt, where we designate the name as the row id, and discard zero values. The resulting table has multiple rows for each animal: one for each of animals features. - -An interesting immediate use for this is to pass this data to Distributions and see what are the most and the least common features of animals. - -![](images/Melt-Workflow.png) - -In the next example we show how shuffling class values influences model performance on the same dataset as above. - -![](images/Melt-Distribution.png) diff --git a/doc/visual-programming/source/widgets/data/mergedata.md b/doc/visual-programming/source/widgets/data/mergedata.md deleted file mode 100644 index 9ed91750c0f..00000000000 --- a/doc/visual-programming/source/widgets/data/mergedata.md +++ /dev/null @@ -1,105 +0,0 @@ -Merge Data -========== - -Merges two datasets, based on values of selected attributes. - -**Inputs** - -- Data: input dataset -- Extra Data: additional dataset - -**Outputs** - -- Data: dataset with features added from extra data - -The **Merge Data** widget is used to horizontally merge two datasets, based on the values of selected attributes (columns). In the input, two datasets are required, data and extra data. Rows from the two data sets are matched by the values of pairs of attributes, chosen by the user. The widget produces one output. It corresponds to the instances from the input data to which attributes (columns) from input extra data are appended. - -To match by a combination of features click on the plus icon to add the features to merge on. - -Depending upon the merge types, selected features may be required to have unique values (that is, no duplicates) in the data. When merging by multiple features, this pertains to a combinations of their values. - -![](images/Merge-Data-stamped.png) - -1. Information on main data. -2. Information on data to append. -3. Merging type: - - **Append columns from Extra Data** outputs all rows from the Data, augmented by the columns in the Extra Data. Rows without matches are retained, even where the data in the extra columns are missing. - - **Find matching pairs of rows** outputs rows from the Data, augmented by the columns in the Extra Data. Rows without matches are removed from the output. - - **Concatenate tables** treats both data sources symmetrically. The output is similar to the first option, except that non-matched values from Extra Data are appended at the end. -4. List of attributes from Data input. -5. List of attributes from Extra Data input. -6. Produce a report. - -Merging Types -------------- - -#####Append Columns from Extra Data (left join) - -Columns from the Extra Data are added to the Data. Instances with no matching rows will have missing values added. - -For example, the first table may contain city names and the second would be a list of cities and their coordinates. Columns with coordinates would then be appended to the data with city names. Where city names cannot be matched, missing values will appear. - -In our example, the first Data input contained 6 cities, but the Extra Data did not provide Lat and Lon values for Bratislava, so the fields will be empty. - -For this type of merge, the values on the left (e.g. cities) may repeat (e.g. the same city appear multiple times), while the *used* value on the right must not. For example, let the right-hand table contain multiple Springfields. If Springfield does not appear on the left, the widget will show a warning but still merge the data. If Springfield does appear on the left as well, the widget will show an error. This can be resolved if the both table also include the data on the state (e.g. Illinois, Missouri, Oregon, Ohio) and this feature is added to the combination being matched. - -![](images/MergeData_Append.png) - -#####Find matching pairs of rows (inner join) - -Only those rows that are matched will be present on the output, with the Extra Data columns appended. Rows without matches are removed. - -In our example, Bratislava from the Data input did not have Lat and Lon values, while Belgrade from the Extra Data could not be found in the City column we were merging on. Hence both instances are removed - only the intersection of instances is sent to the output. - -For this type of merge, combinations of features on the left and on the right must be unique. - -![](images/MergeData_Intersection.png) - -#####Concatenate tables (outer join) - -The rows from both the Data and the Extra Data will be present on the output. Where rows cannot be matched, missing values will appear. - -In our example, both Bratislava and Belgrade are now present. Bratislava will have missing Lat and Lon values, while Belgrade will have a missing Population value. - -For this type of merge, combinations of features on the left and on the right must be unique. - -![](images/MergeData_Concatenate.png) - -#####Row index - -Data will be merged in the same order as they appear in the table. Row number 1 from the Data input will be joined with row number 1 from the Extra Data input. Row numbers are assigned by Orange based on the original order of the data instances. - -#####Instance ID - -This is a more complex option. Sometimes, data in transformed in the analysis and the domain is no longer the same. Nevertheless, the original row indices are still present in the background (Orange remembers them). In this case one can merge on instance ID. For example if you transformed the data with PCA, visualized it in the Scatter Plot, selected some data instances and now you wish to see the original information of the selected subset. Connect the output of Scatter Plot to Merge Data, add the original data set as Extra Data and merge by Instance ID. - -![](images/MergeData-InstanceID.png) - -#####Merge by two or more attributes - -Sometimes our data instances are unique with respect to a combination of columns, not a single column. To merge by more than a single column, add the *Row matching* condition by pressing plus next to the matching condition. To remove it, press the x. - -In the below example, we are merging by *student* column and *class* column. - -![](images/MergeData-multiple.png) - -Say we have two data sets with student names and the class they're in. The first data set has students' grades and the second on the elective course they have chosen. Unfortunately, there are two Jacks in our data, one from class A and the other from class B. Same for Jane. - -To distinguish between the two, we can match rows on both, the student's name and her class. - -![](images/MergeData-multiple2.png) - -Examples --------- - -Merging two datasets results in appending new attributes to the original file, based on a selected common attribute. In the example below, we wanted to merge the **zoo.tab** file containing only factual data with [zoo-with-images.tab](http://file.biolab.si/datasets/zoo-with-images.tab) containing images. Both files share a common string attribute *names*. Now, we create a workflow connecting the two files. The *zoo.tab* data is connected to **Data** input of the **Merge Data** widget, and the *zoo-with-images.tab* data to the **Extra Data** input. Outputs of the **Merge Data** widget is then connected to the [Data Table](../data/datatable.md) widget. In the latter, the **Merged Data** channels are shown, where image attributes are added to the original data. - -![](images/MergeData-Example.png) - -The case where we want to include all instances in the output, even those where no match by attribute *names* was found, is shown in the following workflow. - -![](images/MergeData-Example2.png) - -The third type of merging is shown in the next workflow. The output consists of both inputs, with unknown values assigned where no match was found. - -![](images/MergeData-Example3.png) diff --git a/doc/visual-programming/source/widgets/data/neighbors.md b/doc/visual-programming/source/widgets/data/neighbors.md deleted file mode 100644 index 423c64ee66c..00000000000 --- a/doc/visual-programming/source/widgets/data/neighbors.md +++ /dev/null @@ -1,42 +0,0 @@ -Neighbors -========= - -Compute nearest neighbors in data according to reference. - -**Inputs** - -- Data: An input data set. -- Reference: A reference data for neighbor computation. - -**Outputs** - -- Neighbors: A data table of nearest neighbors according to reference. - -The **Neighbors** widget computes nearest neighbors for a given reference and for a given distance measure. The reference can be either one instance or more instances. In the case with one reference widget outputs closest `n` instances from data where `n` is set by the **Number of neighbors** option in the widget. When reference contains more instances widget computes the combined distance for each data instance as a minimum of distances to each reference. Widget outputs `n` data instances with lowest combined distance. - -![](images/neighbours-stamped.png) - -1. Distance measure for computing neighbors. Supported measures are: Euclidean, Manhattan, Mahalanobis, Cosine, Jaccard, Spearman, absolute Spearman, Pearson, absolute Pearson. -2. Number of neighbors on the output. -3. If *Exclude rows (equal to) references* is ticked, data instances that are highly similar to the reference (distance < 1e-5), will be excluded. -4. Click *Apply* to commit the changes. To communicate changes automatically tick *Apply Automatically*. -5. Status bar with access to widget help and information on the input and output data. - -Examples --------- - -In the first example, we used *iris* data and passed it to **Neighbors** and to [Data Table](../data/datatable.md). In **Data Table**, we selected an instance of iris, that will serve as our reference, meaning we wish to retrieve 10 closest examples to the select data instance. We connect **Data Table** to **Neighbors** as well. - -We can observe the results of neighbor computation in **Data Table (1)**, where we can see 10 closest images to our selected iris flower. - -![](images/neighbours-example1.png) - -Now change the selection **Data Table** to multiple examples. As a result, we get instances with closest combined distances to the references. The method computes the combined distance as a minimum of distances to each reference. - -![](images/neighbours-example-multiple.png) - -Another example requires the installation of Image Analytics add-on. We loaded 15 paintings from famous painters with **Import Images** widget and passed them to **Image Embedding**, where we selected *Painters* embedder. - -Then the procedure is the same as above. We passed embedded images to **Image Viewer** and selected a painting from Monet to serve as our reference image. We passed the image to **Neighbors**, where we set the distance measure to *cosine*, ticked off *Exclude reference* and set the neighbors to 2. This allows us to find the actual closest neighbor to a reference painting and observe them side by side in **Image Viewer (1)**. - -![](images/neighbours-example2.png) diff --git a/doc/visual-programming/source/widgets/data/outliers.md b/doc/visual-programming/source/widgets/data/outliers.md deleted file mode 100644 index 580b3e2509b..00000000000 --- a/doc/visual-programming/source/widgets/data/outliers.md +++ /dev/null @@ -1,49 +0,0 @@ -Outliers -======== - -Outlier detection widget. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Outliers: instances scored as outliers -- Inliers: instances not scored as outliers -- Data: input dataset appended *Outlier* variable - -The **Outliers** widget applies one of the four methods for outlier detection. All methods apply classification to the dataset. *One-class SVM with non-linear kernels (RBF)* performs well with non-Gaussian distributions, while *Covariance estimator* works only for data with Gaussian distribution. One efficient way to perform outlier detection on moderately high dimensional datasets is to use the *Local Outlier Factor* algorithm. The algorithm computes a score reflecting the degree of abnormality of the observations. It measures the local density deviation of a given data point with respect to its neighbors. Another efficient way of performing outlier detection in high-dimensional datasets is to use random forests (*Isolation Forest*). - -![](images/Outliers-stamped.png) - -1. Method for outlier detection: - - [One Class SVM](http://scikit-learn.org/stable/modules/generated/sklearn.svm.OneClassSVM.html) - - [Covariance Estimator](http://scikit-learn.org/stable/modules/generated/sklearn.covariance.EllipticEnvelope.html) - - [Local Outlier Factor](http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.LocalOutlierFactor.html) - - [Isolation Forest](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.IsolationForest.html) -2. Set parameters for the method: - - **One class SVM with non-linear kernel (RBF)**: classifies data as similar or different from the core class: - - *Nu* is a parameter for the upper bound on the fraction of training errors and a lower bound of the fraction of support vectors - - *Kernel coefficient* is a gamma parameter, which specifies how much influence a single data instance has - - **Covariance estimator**: fits ellipsis to central points with Mahalanobis distance metric: - - *Contamination* is the proportion of outliers in the dataset - - *Support fraction* specifies the proportion of points included in the estimate - - **Local Outlier Factor**: obtains local density from the k-nearest neighbors: - - *Contamination* is the proportion of outliers in the dataset - - *Neighbors* represents number of neighbors - - *Metric* is the distance measure - - **Isolation Forest**: isolates observations by randomly selecting a feature and then randomly selecting a split value between the maximum and minimum values of the selected feature: - - *Contamination* is the proportion of outliers in the dataset - - *Replicabe training* fixes random seed -3. If *Apply automatically* is ticked, changes will be propagated automatically. Alternatively, click *Apply*. -4. Produce a report. -5. Number of instances on the input, followed by number of instances scored as inliers. - - -Example -------- - -Below is an example of how to use this widget. We used subset (*versicolor* and *virginica* instances) of the *Iris* dataset to detect the outliers. We chose the *Local Outlier Factor* method, with *Euclidean* distance. Then we observed the annotated instances in the [Scatter Plot](../visualize/scatterplot.md) widget. In the next step we used the *setosa* instances to demonstrate novelty detection using [Apply Domain](../data/applydomain.md) widget. After concatenating both outputs we examined the outliers in the *Scatter Plot (1)*. - -![](images/Outliers-Example.png) diff --git a/doc/visual-programming/source/widgets/data/paintdata.md b/doc/visual-programming/source/widgets/data/paintdata.md deleted file mode 100644 index ca8dc87b2e0..00000000000 --- a/doc/visual-programming/source/widgets/data/paintdata.md +++ /dev/null @@ -1,26 +0,0 @@ -Paint Data -========== - -Paints data on a 2D plane. You can place individual data points or use a brush to paint larger datasets. - -**Outputs** - -- Data: dataset as painted in the plot - -The widget supports the creation of a new dataset by visually placing data points on a two-dimension plane. Data points can be placed on the plane individually (*Put*) or in a larger number by brushing (*Brush*). Data points can belong to classes if the data is intended to be used in supervised learning. - -![](images/PaintData-stamped.png) - -1. Name the axes and select a class to paint data instances. You can add or remove classes. Use only one class to create classless, unsupervised datasets. -2. Drawing tools. Paint data points with *Brush* (multiple data instances) or *Put* (individual data instance). Select data points with *Select* and remove them with the Delete/Backspace key. Reposition data points with [Jitter](https://en.wikipedia.org/wiki/Jitter) (spread) and *Magnet* (focus). Use *Zoom* and scroll to zoom in or out. Below, set the radius and intensity for Brush, Put, Jitter and Magnet tools. -3. Reset to Input Data. -4. *Save Image* saves the image to your computer in a .svg or .png format. -5. Produce a report. -6. Tick the box on the left to automatically commit changes to other widgets. Alternatively, press *Send* to apply them. - -Example -------- - -In the example below, we have painted a dataset with 4 classes. Such dataset is great for demonstrating k-means and hierarchical clustering methods. In the screenshot, we see that [k-Means](../unsupervised/kmeans.md), overall, recognizes clusters better than [Hierarchical Clustering](../unsupervised/hierarchicalclustering.md). It returns a score rank, where the best score (the one with the highest value) means the most likely number of clusters. Hierarchical clustering, however, doesn’t group the right classes together. This is a great tool for learning and exploring statistical concepts. - -![](images/PaintData-Example.png) diff --git a/doc/visual-programming/source/widgets/data/pivot.md b/doc/visual-programming/source/widgets/data/pivot.md deleted file mode 100644 index 297d133b8c2..00000000000 --- a/doc/visual-programming/source/widgets/data/pivot.md +++ /dev/null @@ -1,68 +0,0 @@ -Pivot Table -=========== - -Reshape data table based on column values. - -**Inputs** - -- Data: input data set - -**Outputs** - -- Pivot Table: contingency matrix as shown in the widget -- Filtered Data: subset selected from the plot -- Grouped Data: aggregates over groups defined by row values - -**Pivot Table** summarizes the data of a more extensive table into a table of statistics. The statistics can include sums, averages, counts, etc. The widget also allows selecting a subset from the table and grouping by row values, which have to be a discrete variable. Data with only numeric variables cannot be displayed in the table. - -![](images/Pivot-stamped.png) - -1. Discrete or numeric variable used for row values. Numeric variables are considered as integers. -2. Discrete variable used for column values. Variable values will appear as columns in the table. -3. Values used for aggregation. Aggregated values will appear as cells in the table. -4. Aggregation methods: - - For any variable type: - - *Count*: number of instances with the given row and column value. - - *Count defined*: number of instances where the aggregation value is defined. - - For numeric variables: - - *Sum*: sum of values. - - *Mean*: average of values. - - *Mode*: most frequent value of the subset. - - *Min*: smallest value. - - *Max*: highest value. - - *Median*: middle value. - - *Var*: variance of the subset. - - For discrete variables: - - *Majority*: most frequent value of the subset. -5. Tick the box on the left to automatically output any changes. Alternatively, press *Apply* . - -Discrete variables ------------------- - -![](images/Pivot-discrete.png) - -Example of a pivot table with only discrete variables selected. We are using *heart-disease* data set for this example. Rows correspond to values of *diameter narrowing* variable. Our columns are values of *gender*, namely female and male. We are using *thal* as values in our cells. - -We have selected *Count* and *Majority* as aggregation methods. In the pivot table, we can see the number of instances that do not have diameter narrowing and are female. There are 72 such patients. Concurrently, there are 92 male patients that don't have diameter narrowing. Thal values don't have any effect here, we are just counting occurrences in the data. - -The second row shows majority. This means most female patients that don't have diameter narrowing have normal thal results. Conversely, female patients that have diameter narrowing most often have reversable defect. - -Numeric variables ------------------ - -![](images/Pivot-continuous.png) - -Example of a pivot table with numeric variables. We are using *heart-disease* data set for this example. Rows correspond to values of *diameter narrowing* variable. Our columns are values of *gender*, namely female and male. We are using *rest SBP* as values in our cells. - -We have selected *Count*, *Sum* and *Median* as aggregation methods. Under *Count*, we see there are 72 female patients that don't have diameter narrowing, same as before for discrete values. What is different are the sum and median aggregations. We see that the sum of resting systolic blood pressure for female patients that don't have diameter narrowing is 9269 and the median value is 130. - -Example -------- - -We are using *Forest Fires* for this example. The data is loaded in the [Datasets](../data/datasets.md) widget and passed to **Pivot Table**. *Forest Fires* datasets reports forest fires by the month and day they happened. We can aggregate all occurrences of forest fires by selecting *Count* as aggregation method and using *month* as row and *day* as column values. Since we are using *Count*, *Values* variable will have no effect. - -We can plot the counts in [Line Plot](../visualize/lineplot.md). But first, let us organize our data a bit. With [Edit Domain](../data/editdomain.md), we will reorder rows values so that months will appear in the correct order, namely from January to December. To do the same for columns, we will use [Select Columns](../data/selectcolumns.md) and reorder day to go from Monday to Sunday. - -Finally, our data is ready. Let us pass it to **Line Plot**. We can see that forest fires are most common in August and September, while their frequency is higher during the weekend than during weekdays. - -![](images/Pivot-example.png) diff --git a/doc/visual-programming/source/widgets/data/preprocess.md b/doc/visual-programming/source/widgets/data/preprocess.md deleted file mode 100644 index b1950baba4f..00000000000 --- a/doc/visual-programming/source/widgets/data/preprocess.md +++ /dev/null @@ -1,90 +0,0 @@ -Preprocess -========== - -Preprocesses data with selected methods. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Preprocessor: preprocessing method -- Preprocessed Data: data preprocessed with selected methods - -Preprocessing is crucial for achieving better-quality analysis results. The **Preprocess** widget offers several preprocessing methods that can be combined in a single preprocessing pipeline. Some methods are available as separate widgets, which offer advanced techniques and greater parameter tuning. - -![](images/preprocess-stamped.png) - -1. List of preprocessors. Double click the preprocessors you wish to use and shuffle their order by dragging them up or down. You can also add preprocessors by dragging them from the left menu to the right. -2. Preprocessing pipeline. -3. When the box is ticked (*Send Automatically*), the widget will communicate changes automatically. Alternatively, click *Send*. - -Preprocessors -------------- - -![](images/Preprocess1.png) - -1. List of preprocessors. -2. Discretization of continuous values: - - [Entropy-MDL discretization](http://sci2s.ugr.es/keel/pdf/algorithm/congreso/fayyad1993.pdf) by Fayyad and Irani that uses [expected information](http://kevinmeurer.com/a-simple-guide-to-entropy-based-discretization/) to determine bins. - - *Equal frequency discretization* splits by frequency (same number of instances in each bin. - - *Equal width discretization* creates bins of equal width (span of each bin is the same). - - *Remove numeric features* altogether. -3. Continuization of discrete values: - - *Most frequent as base* treats the most frequent discrete value as 0 and others as 1. The discrete attributes with more than 2 values, the most frequent will be considered as a base and contrasted with remaining values in corresponding columns. - - *One feature per value* creates columns for each value, place 1 where an instance has that value and 0 where it doesn't. Essentially [One Hot Encoding](http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html). - - *Remove non-binary features* retains only categorical features that have values of either 0 or 1 and transforms them into continuous. - - *Remove categorical features* removes categorical features altogether. - - *Treat as ordinal* takes discrete values and treats them as numbers. If discrete values are categories, each category will be assigned a number as they appear in the data. - - *Divide by number of values* is similar to treat as ordinal, but the final values will be divided by the total number of values and hence the range of the new continuous variable will be [0, 1]. -4. Impute missing values: - - *Average/Most frequent* replaces missing values (NaN) with the average (for continuous) or most frequent (for discrete) value. - - *Replace with random value* replaces missing values with random ones within the range of each variable. - - *Remove rows with missing values*. -5. Select relevant features: - - Similar to [Rank](../data/rank.md), this preprocessor outputs only the most informative features. Score can be determined by information gain, [gain ratio](https://en.wikipedia.org/wiki/Information_gain_ratio), [gini index](https://en.wikipedia.org/wiki/Gini_coefficient), [ReliefF](https://en.wikipedia.org/wiki/Relief_(feature_selection)), [fast correlation based filter](https://www.aaai.org/Papers/ICML/2003/ICML03-111.pdf), [ANOVA](https://en.wikipedia.org/wiki/One-way_analysis_of_variance), [Chi2](https://en.wikipedia.org/wiki/Chi-squared_distribution), [RReliefF](http://lkm.fri.uni-lj.si/rmarko/papers/robnik03-mlj.pdf), and [Univariate Linear Regression](http://scikit-learn.org/stable/modules/feature_selection.html#feature-selection-using-selectfrommodel). - - *Strategy* refers to how many variables should be on the output. *Fixed* returns a fixed number of top scored variables, while *Percentile* return the selected top percent of the features. -6. *Select random features* outputs either a fixed number of features from the original data or a percentage. This is mainly used for advanced testing and educational purposes. - -![](images/Preprocess2.png) - -1. Normalize adjusts values to a common scale. Center values by mean or median or omit centering altogether. Similar for scaling, one can scale by SD (standard deviation), by span or not at all. -2. Randomize instances. Randomize classes shuffles class values and destroys connection between instances and class. Similarly, one can randomize features or meta data. If replicable shuffling is on, randomization results can be shared and repeated with a saved workflow. This is mainly used for advanced testing and educational purposes. -3. *Remove sparse features* retains features that have more than a number/percentage of non-zero/missing values. The rest are discarded. -4. Principal component analysis outputs results of a PCA transformation. Similar to the [PCA](../unsupervised/PCA.md) widget. -5. [CUR matrix decomposition](https://en.wikipedia.org/wiki/CUR_matrix_approximation) is a dimensionality reduction method, similar to SVD. - -Preprocessing for predictive modeling --------------------------------------- - -When building predictive models, one has to be careful about how to do preprocessing. There are two possible ways to do it in Orange, each slightly different: - -1. Connect **Preprocess** to the learner. This will override the default preprocessing pipeline for the learner and apply only custom preprocessing pipeline (default preprocessing steps are described in each learner's documentation). - - ![](images/Preprocess-Models1.png) - -2. Connect **Preprocess** to Test and Score. This will apply the preprocessors to each batch within cross-validation. Then the learner's preprocessors will be applied to the preprocessed subset. - - ![](images/Preprocess-Models2.png) - -Finally, there's a wrong way to do it. Connecting **Preprocess** directly to the original data and outputting preprocessed data set will likely overfit the model. Don't do it. - - ![](images/Preprocess-Models3.png) - -Examples --------- - -In the first example, we have used the *heart_disease.tab* dataset available in the dropdown menu of the [File](../data/file.md) widget. then we used **Preprocess** to impute missing values and normalize features. We can observe the changes in the [Data Table](../data/datatable.md) and compare it to the non-processed data. - -![](images/Preprocess-Example1.png) - -In the second example, we show how to use **Preprocess** for predictive modeling. - -This time we are using the *heart_disease.tab* data from the [File](../data/file.md) widget. You can access the data in the dropdown menu. This is a dataset with 303 patients that came to the doctor suffering from a chest pain. After the tests were done, some patients were found to have diameter narrowing and others did not (this is our class variable). - -Some values are missing in our data set, so we would like to impute missing values before evaluating the model. We do this by passing a preprocessor directly to [Test and Score](../evaluate/testandscore.md). In **Preprocess**, we set the correct preprocessing pipeline (in our example only a single preprocessor with *Impute missing values*), then connect it to the Preprocessor input of Test and Score. - -We also pass the data and the learner (in this case, a [Logistic Regression](../model/logisticregression.md)). This is the correct way to pass a preprocessor to cross-validation as each fold will independently get preprocessed in the training phase. This is particularly important for feature selection. - -![](images/Preprocess-Example2.png) diff --git a/doc/visual-programming/source/widgets/data/purgedomain.md b/doc/visual-programming/source/widgets/data/purgedomain.md deleted file mode 100644 index 22a284fb6bb..00000000000 --- a/doc/visual-programming/source/widgets/data/purgedomain.md +++ /dev/null @@ -1,41 +0,0 @@ -Purge Domain -============ - -Removes unused attribute values and useless attributes, sorts the remaining values. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Data: filtered dataset - -Definitions of nominal attributes sometimes contain values which don’t appear in the data. Even if this does not happen in the original data, filtering the data, selecting exemplary subsets and alike can remove all examples for which the attribute has some particular value. Such values clutter data presentation, especially various visualizations, and should be removed. - -After purging an attribute, it may become single-valued or, in extreme case, have no values at all (if the value of this attribute was undefined for all examples). In such cases, the attribute can be removed. - -A different issue is the order of attribute values: if the data is read from a file in a format in which values are not declared in advance, they are sorted “in order of appearance”. Sometimes we would prefer to have them sorted alphabetically. - -![](images/PurgeDomain-stamped.png) - -1. Purge attributes. -2. Purge classes. -3. Purge meta attributes. -4. Information on the filtering process. -5. Produce a report. -6. If *Apply automatically* is ticked, the widget will output data at - each change of widget settings. - -Such purification is done by the widget **Purge Domain**. Ordinary attributes and class attributes are treated separately. For each, we can decide if we want the values sorted or not. Next, we may allow the widget to remove attributes with less than two values or remove the class attribute if there are less than two classes. Finally, we can instruct the widget to check which values of attributes actually appear in the data and remove the unused values. The widget cannot remove values if it is not allowed to remove the attributes, since having attributes without values makes no sense. - -The new, reduced attributes get the prefix “R”, which distinguishes them from the original ones. The values of new attributes can be computed from the old ones, but not the other way around. This means that if you construct a classifier from the new attributes, you can use it to classify the examples described by the original attributes. But not the opposite: constructing a classifier from the old attributes and using it on examples described by the reduced ones won’t work. Fortunately, the latter is seldom the case. In a typical setup, one would explore the data, visualize it, filter it, purify it… and then test the final model on the original data. - -Example -------- - -The **Purge Domain** widget would typically appear after data filtering, for instance when selecting a subset of visualized examples. - -In the above schema, we play with the *adult.tab* dataset: we visualize it and select a portion of the data, which contains only four out of the five original classes. To get rid of the empty class, we put the data through **Purge Domain** before going on to the [Box Plot](../visualize/boxplot.md) widget. The latter shows only the four classes which are in the **Purge Data** output. To see the effect of data purification, uncheck *Remove unused class variable values* and observe the effect this has on [Box Plot](../visualize/boxplot.md). - -![](images/PurgeDomain-example.png) diff --git a/doc/visual-programming/source/widgets/data/pythonscript.md b/doc/visual-programming/source/widgets/data/pythonscript.md deleted file mode 100644 index 6c91b1bca28..00000000000 --- a/doc/visual-programming/source/widgets/data/pythonscript.md +++ /dev/null @@ -1,88 +0,0 @@ -Python Script -============= - -Extends functionalities through Python scripting. - -**Inputs** - -- Data (Orange.data.Table): input dataset bound to ``in_data`` variable -- Learner (Orange.classification.Learner): input learner bound to ``in_learner`` variable -- Classifier (Orange.classification.Learner): input classifier bound to ``in_classifier`` variable -- Object: input Python object bound to ``in_object`` variable - -**Outputs** - -- Data (Orange.data.Table): dataset retrieved from ``out_data`` variable -- Learner (Orange.classification.Learner): learner retrieved from ``out_learner`` variable -- Classifier (Orange.classification.Learner): classifier retrieved from ``out_classifier`` variable -- Object: Python object retrieved from ``out_object`` variable - -**Python Script** widget can be used to run a python script in the input, when a suitable functionality is not implemented in an existing widget. The script has ``in_data``, ``in_distance``, ``in_learner``, ``in_classifier`` and ``in_object`` variables (from input signals) in its local namespace. If a signal is not connected or it did not yet receive any data, those variables contain ``None``. - -After the script is executed variables from the script’s local namespace are extracted and used as outputs of the widget. The widget can be further connected to other widgets for visualizing the output. - -For instance the following script would simply pass on all signals it receives: - - out_data = in_data - out_distance = in_distance - out_learner = in_learner - out_classifier = in_classifier - out_object = in_object - -Note: You should not modify the input objects in place. - -![](images/PythonScript-stamped.png) - -1. Info box contains names of basic operators for Orange Python script. -2. The *Library* control can be used to manage multiple scripts. Pressing "+" will add a new entry and open it in the *Python script* editor. When the script is modified, its entry in the *Library* will change to indicate it has unsaved changes. Pressing *Update* will save the script (keyboard shortcut "Ctrl+S"). A script can be removed by selecting it and pressing the "-" button. -3. Pressing *Execute* in the *Run* box executes the script (keyboard shortcut "Ctrl+R"). Any script output (from ``print``) is captured and displayed in the *Console* below the script. -4. The *Python script* editor on the left can be used to edit a script (it supports some rudimentary syntax highlighting). -5. Console displays the output of the script. - -Examples --------- - -Python Script widget is intended to extend functionalities for advanced users. Classes from Orange library are described in the [documentation](https://docs.biolab.si/3/data-mining-library/#reference). To find further information about orange Table class see [Table](https://docs.biolab.si/3/data-mining-library/reference/data.table.html), [Domain](https://docs.biolab.si/3/data-mining-library/reference/data.domain.html), and [Variable](https://docs.biolab.si/3/data-mining-library/reference/data.variable.html) documentation. - -One can, for example, do batch filtering by attributes. We used zoo.tab for the example and we filtered out all the attributes that have more than 5 discrete values. This in our case removed only 'leg' attribute, but imagine an example where one would have many such attributes. - - from Orange.data import Domain, Table - domain = Domain([attr for attr in in_data.domain.attributes - if attr.is_continuous or len(attr.values) <= 5], - in_data.domain.class_vars) - out_data = Table(domain, in_data) - -![](images/PythonScript-filtering.png) - -The second example shows how to round all the values in a few lines of code. This time we used wine.tab and rounded all the values to whole numbers. - - import numpy as np - out_data = in_data.copy() - #copy, otherwise input data will be overwritten - np.round(out_data.X, 0, out_data.X) - -![](images/PythonScript-round.png) - -The third example introduces some Gaussian noise to the data. Again we make a copy of the input data, then walk through all the values with a double for loop and add random noise. - - import random - from Orange.data import Domain, Table - new_data = in_data.copy() - for inst in new_data: - for f in inst.domain.attributes: - inst[f] += random.gauss(0, 0.02) - out_data = new_data - -![](images/PythonScript-gauss.png) - -The final example uses Orange3-Text add-on. **Python Script** is very useful for custom preprocessing in text mining, extracting new features from strings, or utilizing advanced *nltk* or *gensim* functions. Below, we simply tokenized our input data from *deerwester.tab* by splitting them by whitespace. - - print('Running Preprocessing ...') - tokens = [doc.split(' ') for doc in in_data.documents] - print('Tokens:', tokens) - out_object = in_data - out_object.store_tokens(tokens) - -You can add a lot of other preprocessing steps to further adjust the output. The output of **Python Script** can be used with any widget that accepts the type of output your script produces. In this case, connection is green, which signalizes the right type of input for Word Cloud widget. - -![](images/PythonScript-Example3.png) diff --git a/doc/visual-programming/source/widgets/data/randomize.md b/doc/visual-programming/source/widgets/data/randomize.md deleted file mode 100644 index 672e03ac6c2..00000000000 --- a/doc/visual-programming/source/widgets/data/randomize.md +++ /dev/null @@ -1,33 +0,0 @@ -Randomize -========= - -Shuffles classes, attributes and/or metas of an input dataset. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Data: randomized dataset - -The **Randomize** widget receives a dataset in the input and outputs the same dataset in which the classes, attributes or/and metas are shuffled. - -![](images/Randomize-Default.png) - -1. Select group of columns of the dataset you want to shuffle. -2. Select proportion of the dataset you want to shuffle. -3. Produce replicable output. -4. If *Apply automatically* is ticked, changes are committed automatically. Otherwise, you have to press *Apply* after each change. -5. Produce a report. - -Example -------- - -The **Randomize** widget is usually placed right after (e.g. [File](../data/file.md) widget. The basic usage is shown in the following workflow, where values of class variable of Iris dataset are randomly shuffled. - -![](images/Randomize-Example1.png) - -In the next example we show how shuffling class values influences model performance on the same dataset as above. - -![](images/Randomize-Example2.png) diff --git a/doc/visual-programming/source/widgets/data/rank.md b/doc/visual-programming/source/widgets/data/rank.md deleted file mode 100644 index 8e95e65cd2c..00000000000 --- a/doc/visual-programming/source/widgets/data/rank.md +++ /dev/null @@ -1,75 +0,0 @@ -Rank -==== - -Ranking of attributes in classification or regression datasets. - -**Inputs** - -- Data: input dataset -- Scorer: models for feature scoring - -**Outputs** - -- Reduced Data: dataset with selected attributes -- Scores: data table with feature scores -- Features: list of attributes - -The **Rank** widget scores variables according to their correlation with discrete or numeric target variable, based on applicable internal scorers (like information gain, chi-square and linear regression) and any connected external models that supports scoring, such as linear regression, logistic regression, random forest, SGD, etc. The widget can also handle unsupervised data, but only by external scorers, such as PCA. - -![](images/Rank-stamped.png) - -1. Select scoring methods. See the options for classification, regression and unsupervised data in the **Scoring methods** section. -2. Select attributes to output. *None* won't output any attributes, while *All* will output all of them. With manual selection, select the attributes from the table on the right. *Best ranked* will output n best ranked attributes. - If *Send Automatically* is ticked, the widget automatically communicates changes to other widgets. -3. Status bar. Produce a report by clicking on the file icon. Observe input and output of the widget. On the right, warnings and errors are shown. - -Scoring methods (classification) --------------------------------- - -1. Information Gain: the expected amount of information (reduction of entropy) -2. [Gain Ratio](https://en.wikipedia.org/wiki/Information_gain_ratio): a ratio of the information gain and the attribute's intrinsic information, which reduces the bias towards multivalued features that occurs in information gain -3. [Gini](https://en.wikipedia.org/wiki/Gini_coefficient): the inequality among values of a frequency distribution -4. [ANOVA](https://en.wikipedia.org/wiki/One-way_analysis_of_variance): the difference between average values of the feature in different classes -5. [Chi2](https://en.wikipedia.org/wiki/Chi-squared_distribution): dependence between the feature and the class as measured by the chi-square statistic -6. [ReliefF](https://en.wikipedia.org/wiki/Relief_(feature_selection)): the ability of an attribute to distinguish between classes on similar data instances -7. [FCBF (Fast Correlation Based Filter)](https://www.aaai.org/Papers/ICML/2003/ICML03-111.pdf): entropy-based measure, which also identifies redundancy due to pairwise correlations between features - -Additionally, you can connect certain learners that enable scoring the features according to how important they are in models that the learners build (e.g. [Logistic Regression](../model/logisticregression.md), [Random Forest](../model/randomforest.md), [SGD](../model/stochasticgradient.md)). Please note that the data is normalized before ranking. - -Scoring methods (regression) ----------------------------- - -1. [Univariate Regression](https://en.wikipedia.org/wiki/Simple_linear_regression): linear regression for a single variable -2. [RReliefF](http://www.clopinet.com/isabelle/Projects/reading/robnik97-icml.pdf): relative distance between the predicted (class) values of the two instances. - -Additionally, you can connect regression learners (e.g. [Linear Regression](../model/linearregression.md), [Random Forest](../model/randomforest.md), [SGD](../model/stochasticgradient.md)). Please note that the data is normalized before ranking. - -Scoring method (unsupervised) ------------------------------ - -Currently, only [PCA](../unsupervised/PCA.md) is supported for unsupervised data. Connect PCA to Rank to obtain the scores. The scores correspond to the correlation of a variable with the individual principal component. - -Scoring with learners ---------------------- - -Rank can also use certain learners for feature scoring. See [Learners as Scorers](../../learners-as-scorers/index.md) for an example. - -Example: Attribute Ranking and Selection ----------------------------------------- - -Below, we have used the **Rank** widget immediately after the [File](../data/file.md) widget to reduce the set of data attributes and include only the most informative ones: - -![](images/Rank-Select-Schema.png) - -Notice how the widget outputs a dataset that includes only the best-scored attributes: - -![](images/Rank-Select-Widgets.png) - -Example: Feature Subset Selection for Machine Learning ------------------------------------------------------- - -What follows is a bit more complicated example. In the workflow below, we first split the data into a training set and a test set. In the upper branch, the training data passes through the **Rank** widget to select the most informative attributes, while in the lower branch there is no feature selection. Both feature selected and original datasets are passed to their own [Test & Score](../evaluate/testandscore.md) widgets, which develop a *Naive Bayes* classifier and score it on a test set. - -![](images/Rank-and-Test.png) - -For datasets with many features, a naive Bayesian classifier feature selection, as shown above, would often yield a better predictive accuracy. diff --git a/doc/visual-programming/source/widgets/data/save.md b/doc/visual-programming/source/widgets/data/save.md deleted file mode 100644 index d52025b4358..00000000000 --- a/doc/visual-programming/source/widgets/data/save.md +++ /dev/null @@ -1,36 +0,0 @@ -Save Data -========= - -Saves data to a file. - -**Inputs** - -- Data: input dataset - -The **Save Data** widget considers a dataset provided in the input channel and saves it to a data file with a specified name. It can save the data as: - -- a tab-delimited file (.tab) -- comma-separated file (.csv) -- pickle (.pkl), used for storing preprocessing of [Corpus](https://orange.biolab.si/widget-catalog/text-mining/corpus-widget/) objects -- Excel spreadsheets (.xlsx) -- spectra ASCII (.dat) -- hyperspectral map ASCII (.xyz) -- compressed formats (.tab.gz, .csv.gz, .pkl.gz) - -The widget does not save the data every time it receives a new signal in the input as this would constantly (and, mostly, inadvertently) overwrite the file. Instead, the data is saved only after a new file name is set or the user pushes the *Save* button. - -If the file is saved to the same directory as the workflow or in the subtree of that directory, the widget remembers the relative path. Otherwise, it will store an absolute path but disable auto save for security reasons. - -![](images/SaveData.png) - -- *Add type annotations to header*: Include Orange's three-row header in the output file. -- *Autosave when receiving new data*: Always save new data. Be careful! This will overwrite existing data on your system. -- *Save* by overwriting the existing file. -- *Save as* to create a new file. - -Example -------- - -In the workflow below, we used the *Zoo* dataset. We loaded the data into the [Scatter Plot](../visualize/scatterplot.md) widget, with which we selected a subset of data instances and pushed them to the **Save Data** widget to store them in a file. - -![](images/Save-Workflow.png) diff --git a/doc/visual-programming/source/widgets/data/select-by-data-index.md b/doc/visual-programming/source/widgets/data/select-by-data-index.md deleted file mode 100644 index 794968a88e7..00000000000 --- a/doc/visual-programming/source/widgets/data/select-by-data-index.md +++ /dev/null @@ -1,33 +0,0 @@ -Select by Data Index -==================== - -Match instances by index from data subset. - -**Inputs** - -- Data: reference data set -- Data Subset: subset to match - -**Outputs** - -- Matching data: subset from reference data set that matches indices from subset data -- Unmatched data: subset from reference data set that does not match indices from subset data -- Annotated data: reference data set with an additional column defining matches - -**Select by Data Index** enables matching the data by indices. Each row in a data set has an index and given a subset, this widget can match these indices to indices from the reference data. Most often it is used to retrieve the original data from the transformed data (say, from PCA space). - -![](images/Select-by-Data-Index-stamped.png) - -1. Information on the reference data set. This data is used as index reference. -2. Information on the data subset. The indices of this data set are used to find matching data in the reference data set. Matching data are on the output by default. - -Example -------- - -A typical use of **Select by Data Index** is to retrieve the original data after a transformation. We will load *iris.tab* data in the [File](../data/file.md) widget. Then we will transform this data with [PCA](../unsupervised/PCA.md). We can project the transformed data in a [Scatter Plot](../visualize/scatterplot.md), where we can only see PCA components and not the original features. - -Now we will select an interesting subset (we could also select the entire data set). If we observe it in a [Data Table](../data/datatable.md), we can see that the data is transformed. If we would like to see this data with the original features, we will have to retrieve them with **Select by Data Index**. - -Connect the original data and the subset from [Scatter Plot](../visualize/scatterplot.md) to **Select by Data Index**. The widget will match the indices of the subset with the indices of the reference (original) data and output the matching reference data. A final inspection in another [Data Table](../data/datatable.md) confirms the data on the output is from the original data space. - -![](images/Select-by-Data-Index-Example1.png) diff --git a/doc/visual-programming/source/widgets/data/selectcolumns.md b/doc/visual-programming/source/widgets/data/selectcolumns.md deleted file mode 100644 index 7615d040076..00000000000 --- a/doc/visual-programming/source/widgets/data/selectcolumns.md +++ /dev/null @@ -1,38 +0,0 @@ -Select Columns -============== - -Manual selection of data attributes and composition of data domain. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Data: dataset with columns as set in the widget - -The **Select Columns** widget is used to manually compose your [data domain](https://en.wikipedia.org/wiki/Data_domain). The user can decide which attributes will be used and how. Orange distinguishes between ordinary attributes, (optional) class attributes and meta attributes. For instance, for building a classification model, the domain would be composed of a set of attributes and a discrete class attribute. Meta attributes are not used in modeling, but several widgets can use them as instance labels. - -Orange attributes have a type and are either discrete, continuous or a character string. The attribute type is marked with a symbol appearing before the name of the attribute (D, C, S, respectively). - -![](images/SelectColumns-stamped.png) - -1. Left-out data attributes that will not be in the output data file -2. Data attributes in the new data file -3. Target variable. If none, the new dataset will be without a target variable. -4. Meta attributes of the new data file. These attributes are included in the dataset but are, for most methods, not considered in the analysis. -5. Produce a report. -6. Reset the domain composition to that of the input data file. -7. Tick if you wish to auto-apply changes of the data domain. -8. Apply changes of the data domain and send the new data file to the output channel of the widget. - -Examples --------- - -In the workflow below, the *Iris* data from the [File](../data/file.md) widget is fed into the **Select Columns** widget, where we select to output only two attributes (namely petal width and petal length). We view both the original dataset and the dataset with selected columns in the [Data Table](../data/datatable.md) widget. - -![](images/SelectColumns-Example1.png) - -For a more complex use of the widget, we composed a workflow to redefine the classification problem in the *heart-disease* dataset. Originally, the task was to predict if the patient has a coronary artery diameter narrowing. We changed the problem to that of gender classification, based on age, chest pain and cholesterol level, and informatively kept the diameter narrowing as a meta attribute. - -![](images/SelectColumns-Example2.png) diff --git a/doc/visual-programming/source/widgets/data/selectrows.md b/doc/visual-programming/source/widgets/data/selectrows.md deleted file mode 100644 index 91b4f94ebf2..00000000000 --- a/doc/visual-programming/source/widgets/data/selectrows.md +++ /dev/null @@ -1,46 +0,0 @@ -Select Rows -=========== - -Selects data instances based on conditions over data features. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Matching Data: instances that match the conditions -- Non-Matching Data: instances that do not match the conditions -- Data: data with an additional column showing whether a instance is selected - -This widget selects a subset from an input dataset, based on user-defined conditions. Instances that match the selection rule are placed in the output *Matching Data* channel. - -Criteria for data selection are presented as a collection of conjunct terms (i.e. selected items are those matching all the terms in '*Conditions*'). - -Condition terms are defined through selecting an attribute, selecting an operator from a list of operators, and, if needed, defining the value to be used in the condition term. Operators are different for discrete, continuous and string attributes. - -![](images/SelectRows-stamped.png) - -1. Conditions you want to apply, their operators and related values -2. Add a new condition to the list of conditions. -3. Add all the possible variables at once. -4. Remove all the listed variables at once. -5. Information on the input dataset and information on instances that match the condition(s) -6. Purge the output data. -7. When the *Send automatically* box is ticked, all changes will be automatically communicated to other widgets. -8. Produce a report. - -Any change in the composition of the condition will update the information pane (*Data Out*). - -If *Send automatically* is selected, then the output is updated on any change in the composition of the condition or any of its terms. - -Example -------- - -In the workflow below, we used the *Zoo* data from the [File](../data/file.md) widget and fed it into the **Select Rows** widget. In the widget, we chose to output only two animal types, namely fish and reptiles. We can inspect both the original dataset and the dataset with selected rows in the [Data Table](../data/datatable.md) widget. - -![](images/SelectRows-Example.png) - -In the next example, we used the data from the *Titanic* dataset and similarly fed it into the [Box Plot](../visualize/boxplot.md) widget. We first observed the entire dataset based on survival. Then we selected only first class passengers in the **Select Rows** widget and fed it again into the [Box Plot](../visualize/boxplot.md). There we could see all the first class passengers listed by their survival rate and grouped by gender. - -![](images/SelectRows-Workflow.png) diff --git a/doc/visual-programming/source/widgets/data/sqltable.md b/doc/visual-programming/source/widgets/data/sqltable.md deleted file mode 100644 index b50d188a509..00000000000 --- a/doc/visual-programming/source/widgets/data/sqltable.md +++ /dev/null @@ -1,57 +0,0 @@ - -SQL Table -========= - -Reads data from an SQL database. - -**Outputs** - -- Data: dataset from the database - -The **SQL** widget accesses data stored in an SQL database. It can connect to PostgreSQL (requires [psycopg2](http://initd.org/psycopg/) module) or [SQL Server](https://www.microsoft.com/en-us/sql-server/) (requires [pymssql](http://pymssql.org/en/stable/) module). - -To handle large databases, Orange attempts to execute a part of the computation in the database itself without downloading the data. This only works with PostgreSQL database and requires quantile and tsm_system_time [extensions](https://github.com/biolab/orange3/wiki/Installation-of-SQL-extensions) installed on server. If these extensions are not installed, the data will be downloaded locally. - -![](images/SQLTable-stamped.png) - -1. Database type (can be either PostgreSQL or MSSQL). -2. Host name. -3. Database name. -4. Username. -5. Password. -6. Press the blue button to connect to the database. Then select the table in the dropdown. -7. *Auto-discover categorical variables* will cast INT and CHAR columns with less than 20 distinct values as categorical variables (finding all distinct values can be slow on large tables). When not selected, INT will be treated as numeric and CHAR as text. *Download to local memory* downloads the selected table to your local machine. - -## Installation Instructions - -### PostgreSQL - -Install the backend. - - pip install psycopg2 - -Alternatively, you can follow [these instructions](https://blog.biolab.si/2018/02/16/how-to-enable-sql-widget-in-orange/) for installing the backend. - -If the installation of `psycopg2` fails, follow to instructions in the error message you get (it explains how to solve the error) or install an already compiled version of `psycopg2-binary` package: - - pip install psycopg2-binary - -Note: `psycopg2-binary` comes with own versions of a few C libraries, among which libpq and libssl, which will be used regardless of other libraries available on the client: upgrading the system libraries will not upgrade the libraries used by psycopg2. Please build psycopg2 from source if you want to maintain binary upgradeability. - -[Install the extensions](https://github.com/biolab/orange3/wiki/Installation-of-SQL-extensions). [optional] - -### MSSQL - -Install the backend. - - pip install pymssql - -If you are encountering issues, follow [these instructions](https://github.com/biolab/orange3/wiki/Installation-of-SQL-extensions#mssql). - -## Example - -Here is a simple example on how to use the **SQL Table** widget. Place the widget on the canvas, enter your database credentials and connect to your database. Then select the table you wish to analyse. - -Connect **SQL Table** to [Data Table](../data/datatable.md) widget to inspect the output. If the table is populated, your data has transferred correctly. Now, you can use the **SQL Table** widget in the same way as the [File](../data/file.md) widget. - -![](images/SQLTable-Example.png) diff --git a/doc/visual-programming/source/widgets/data/transpose.md b/doc/visual-programming/source/widgets/data/transpose.md deleted file mode 100644 index f51cbee9e0b..00000000000 --- a/doc/visual-programming/source/widgets/data/transpose.md +++ /dev/null @@ -1,23 +0,0 @@ -Transpose -========= - -Transposes a data table. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Data: transposed dataset - -**Transpose** widget transposes data table. - -![](images/transpose-stamped.png) - -Example -------- - -This is a simple workflow showing how to use **Transpose**. Connect the widget to [File](../data/file.md) widget. The output of **Transpose** is a transposed data table with rows as columns and columns as rows. You can observe the result in a [Data Table](../data/datatable.md). - -![](images/transpose-example.png) diff --git a/doc/visual-programming/source/widgets/data/unique.md b/doc/visual-programming/source/widgets/data/unique.md deleted file mode 100644 index a4135857d8f..00000000000 --- a/doc/visual-programming/source/widgets/data/unique.md +++ /dev/null @@ -1,26 +0,0 @@ -Unique -====== - -Remove duplicated data instances. - -**Inputs** - -- Data: data table - -**Outputs** - -- Data: data table without duplicates - -The widget removes duplicated data instances. The user can choose a subset of observed variables, so two instances are considered as duplicates although they may differ in values of other, ignored variables. - -![](images/Unique-stamped.png) - -1. Select the variables that are considered in comparing data instances. -2. Data instance that is kept. The options are to use the first, last, middle or random instance, or to keep none, that is, to remove duplicated instances altogether. - -Example -------- - -Data set *Zoo* contains two frogs. This workflow keeps only one by removing instances with the same names. - -![](images/Unique-Example.png) \ No newline at end of file diff --git a/doc/visual-programming/source/widgets/data/zoo-first.tab b/doc/visual-programming/source/widgets/data/zoo-first.tab deleted file mode 100644 index caaedd5c8e0..00000000000 --- a/doc/visual-programming/source/widgets/data/zoo-first.tab +++ /dev/null @@ -1,9 +0,0 @@ -name hair feathers eggs milk airborne aquatic predator toothed backbone breathes venomous fins legs tail domestic catsize type -string d d d d d d d d d d d d d d d d d - class -aardvark 1 0 0 1 0 0 1 1 1 1 0 0 4 0 0 1 mammal -antelope 1 0 0 1 0 0 0 1 1 1 0 0 4 1 0 1 mammal -bass 0 0 1 0 0 1 1 1 1 0 0 1 0 1 0 0 fish -bear 1 0 0 1 0 0 1 1 1 1 0 0 4 0 0 1 mammal -boar 1 0 0 1 0 0 1 1 1 1 0 0 4 1 0 1 mammal -buffalo 1 0 0 1 0 0 0 1 1 1 0 0 4 1 0 1 mammal diff --git a/doc/visual-programming/source/widgets/data/zoo-only-images.tab b/doc/visual-programming/source/widgets/data/zoo-only-images.tab deleted file mode 100644 index a0c6d3f2788..00000000000 --- a/doc/visual-programming/source/widgets/data/zoo-only-images.tab +++ /dev/null @@ -1 +0,0 @@ -name images string string meta meta type=image antelope http://icons.iconarchive.com/icons/joseph-aeron/us-fish-and-wildlife-service/128/antelope-icon.png bass http://images2.fanpop.com/images/photos/5700000/Largemouth-Bass-fishing-5708828-120-106.jpg bear http://icons.iconarchive.com/icons/iconshock/alaska/256/Polar-Bear-icon.png boar https://pbs.twimg.com/profile_images/2967299392/e0aa28ab427452deacc567a8f2816b3f.jpeg carp http://www.landbigfish.com/images/fish/LBF_Common_Carp.jpg catfish http://www.agfc.com/speciesPhotos/fish_catfish_yellowbullhead.jpg chicken http://latimesblogs.latimes.com/.a/6a00d8341c630a53ef01156f75357a970c-400wi deer http://icons.iconarchive.com/icons/joseph-aeron/us-fish-and-wildlife-service/128/deer-icon.png dolphin http://f0.pepst.com/c/D7D076/36618/ssc3/home/023/deepakjain/albums/dolphin_14kb.jpg_480_480_0_64000_0_1_0.jpg duck http://i.dailymail.co.uk/i/pix/2009/05/21/article-1185197-05075EEF000005DC-380_468x286.jpg gull http://www.allaboutbirds.org/guide/PHOTO/LARGE/herring_gull_adult_breeding2.jpg haddock http://www.oceantrawlers.com/sites/default/files/haddock_0.png?1317994124 hamster http://img3.wikia.nocookie.net/__cb20130325185045/animalcrossing/images/4/49/Tumblr_lvrcmvCpsS1qbeyouo1_500.jpg kiwi http://i.telegraph.co.uk/multimedia/archive/01891/kiwi_1891642c.jpg mink http://i.dailymail.co.uk/i/pix/2012/05/15/article-2144681-0D4F15C800000578-601_468x370.jpg \ No newline at end of file diff --git a/doc/visual-programming/source/widgets/data/zoo-second.tab b/doc/visual-programming/source/widgets/data/zoo-second.tab deleted file mode 100644 index 6be93d2af88..00000000000 --- a/doc/visual-programming/source/widgets/data/zoo-second.tab +++ /dev/null @@ -1,14 +0,0 @@ -name hair feathers eggs milk airborne aquatic predator toothed backbone breathes venomous fins legs tail domestic catsize type -string d d d d d d d d d d d d d d d d d - class -calf 1 0 0 1 0 0 0 1 1 1 0 0 4 1 1 1 mammal -carp 0 0 1 0 0 1 0 1 1 0 0 1 0 1 1 0 fish -catfish 0 0 1 0 0 1 1 1 1 0 0 1 0 1 0 0 fish -cavy 1 0 0 1 0 0 0 1 1 1 0 0 4 0 1 0 mammal -cheetah 1 0 0 1 0 0 1 1 1 1 0 0 4 1 0 1 mammal -chicken 0 1 1 0 1 0 0 0 1 1 0 0 2 1 1 0 bird -chub 0 0 1 0 0 1 1 1 1 0 0 1 0 1 0 0 fish -clam 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 invertebrate -crab 0 0 1 0 0 1 1 0 0 0 0 0 4 0 0 0 invertebrate -crayfish 0 0 1 0 0 1 1 0 0 0 0 0 6 0 0 0 invertebrate -crow 0 1 1 0 1 0 1 0 1 1 0 0 2 1 0 0 bird \ No newline at end of file diff --git a/doc/visual-programming/source/widgets/data/zoo-with-images.tab b/doc/visual-programming/source/widgets/data/zoo-with-images.tab deleted file mode 100644 index a779be7c0c5..00000000000 --- a/doc/visual-programming/source/widgets/data/zoo-with-images.tab +++ /dev/null @@ -1 +0,0 @@ -catsize predator aquatic fins feathers type name images d d d d d d string string class meta meta type=image 1 0 0 0 0 mammal antelope http://icons.iconarchive.com/icons/joseph-aeron/us-fish-and-wildlife-service/128/antelope-icon.png 0 1 1 1 0 fish bass http://images2.fanpop.com/images/photos/5700000/Largemouth-Bass-fishing-5708828-120-106.jpg 1 1 0 0 0 mammal bear http://icons.iconarchive.com/icons/iconshock/alaska/256/Polar-Bear-icon.png 1 1 0 0 0 mammal boar https://pbs.twimg.com/profile_images/2967299392/e0aa28ab427452deacc567a8f2816b3f.jpeg 0 0 1 1 0 fish carp http://www.landbigfish.com/images/fish/LBF_Common_Carp.jpg 0 1 1 1 0 fish catfish http://www.agfc.com/speciesPhotos/fish_catfish_yellowbullhead.jpg 0 0 0 0 1 bird chicken http://latimesblogs.latimes.com/.a/6a00d8341c630a53ef01156f75357a970c-400wi 1 0 0 0 0 mammal deer http://icons.iconarchive.com/icons/joseph-aeron/us-fish-and-wildlife-service/128/deer-icon.png 1 1 1 1 0 mammal dolphin http://f0.pepst.com/c/D7D076/36618/ssc3/home/023/deepakjain/albums/dolphin_14kb.jpg_480_480_0_64000_0_1_0.jpg 0 0 1 0 1 bird duck http://i.dailymail.co.uk/i/pix/2009/05/21/article-1185197-05075EEF000005DC-380_468x286.jpg 0 1 1 0 1 bird gull http://www.allaboutbirds.org/guide/PHOTO/LARGE/herring_gull_adult_breeding2.jpg 0 0 1 1 0 fish haddock http://www.oceantrawlers.com/sites/default/files/haddock_0.png?1317994124 0 0 0 0 0 mammal hamster http://img3.wikia.nocookie.net/__cb20130325185045/animalcrossing/images/4/49/Tumblr_lvrcmvCpsS1qbeyouo1_500.jpg 0 1 0 0 1 bird kiwi http://i.telegraph.co.uk/multimedia/archive/01891/kiwi_1891642c.jpg 1 1 1 0 0 mammal mink http://i.dailymail.co.uk/i/pix/2012/05/15/article-2144681-0D4F15C800000578-601_468x370.jpg \ No newline at end of file diff --git a/doc/visual-programming/source/widgets/evaluate/calibrationplot.md b/doc/visual-programming/source/widgets/evaluate/calibrationplot.md deleted file mode 100644 index 0ee74a379b3..00000000000 --- a/doc/visual-programming/source/widgets/evaluate/calibrationplot.md +++ /dev/null @@ -1,55 +0,0 @@ -Calibration Plot -================ - -Shows the match between classifiers' probability predictions and actual class probabilities. - -**Inputs** - -- Evaluation Results: results of testing classification algorithms - -**Outputs** - -- Calibrated Model: a model with calibrated probabilities or a model with the same probabilities but different classification threshold - -The [Calibration Plot](https://en.wikipedia.org/wiki/Calibration_curve) plots probabilities predicted by the classifier(s) against actual class probabilities. We would use this widget to see whether a classifier is overly optimistic (gives predominantly positive results) or pessimistic (gives predominantly negative results). The widget can also output a calibrated model, where the user sets his/her own probability threshold. - -![](images/Calibration-Plot.png) - -1. Select the desired target class from the drop down menu. - - *Show rug*: If enabled, ticks are displayed at the bottom and the top of the graph, which represent negative and positive examples respectively. Their position corresponds to the classifier's probability prediction. Different colors represent different classifiers. At the bottom of the graph, the points to the left are those which are (correctly) assigned a low probability of the target class, and those to the right are incorrectly assigned high probabilities. At the top of the graph, the instances to the right are correctly assigned high probabilities and vice versa. - - Curves for individual folds: a curve is displayed for each fold from the [Test and Score](testandscore.md) widget. -2. Choose which classifiers to plot. Colors in the list of classifiers correspond to colors used in the plot. The diagonal represents optimal behavior when *Calibration curve* is selected; the closer the classifier's curve gets, the more accurate its prediction probabilities are. -3. Select the metric to calibrate: - - *calibration curve*: displays calibration curves for multiple models. The options for smoothing functions are [Sigmoid](https://en.wikipedia.org/wiki/Sigmoid_function) or [Isotonic](https://en.wikipedia.org/wiki/Isotonic_regression) function. - - *classification accuracy*: displays classification accurracy at changing probability thresholds. Threshold can be set by dragging the vertical line left or right. - - *F1*: displays F1 score at changing probability thresholds. Threshold can be set by dragging the vertical line left or right. - - *sensitivity and specificity*: displays the relationship between [sensitivity and specificity](https://en.wikipedia.org/wiki/Sensitivity_and_specificity) at changing probability thresholds. - - *precision and recall*: displays the relationship between [precision and recall](https://en.wikipedia.org/wiki/Precision_and_recall) at changing probability thresholds. - - *pos and neg predictive value*: displays the relationship between [positive and negative predictive values](https://en.wikipedia.org/wiki/Positive_and_negative_predictive_values) at changing probability thresholds. - - *true and false positive rate*: displays the relationship between [TP and FP rate](https://en.wikipedia.org/wiki/False_positives_and_false_negatives) at changing probability thresholds. -4. If *Apply Automatically* is ticked, changes are communicated automatically. Alternatively, click *Apply*. - -When the widget shows the calibration curve, it outputs a calibrated model, whose predicted probabilities are tuned to better match the actual probabilities. - -When showing other curves (such as F1), the widget outputs a model that gives the same probabilities, but class predictions are made at thresholds different from 0.5. For instance, if we drag the vertical line shown in the graph to 0.3, the widget outputs a model that predicts a positive class when its probability exceeds 30%. - -The widget cannot output a model if the input data contains models obtained from multiple runs (for instance from cross validation or repeated sampling). If multiple models and on the input, only a single one must be chosen in order to have it on the output. The widget also cannot output calibrated model for non-binary classes. - -Examples --------- - -At the moment, only two widgets give the signal of the correct type for **Calibration Plot**: [Test and Score](../evaluate/testandscore.md) and [Predictions](../evaluate/predictions.md). The Calibration Plot will always follow one of them. - -Here is a typical example on the iris data, where we compare two classifiers (namely [Logistic Regression](../model/logisticregression.md) and [Random Forest](../model/randomforest.md)) and input them into [Test and Score](../evaluate/testandscore.md). Test and Score displays evaluation results for each classifier. Then we draw **Calibration Plot** to further analyze the performance of the classifiers. **Calibration Plot** enables you to see prediction accuracy of class probabilities in a plot. - -Judging by the observed curve, the classifier is overly "cautious". Even when it predicts probabilities of around 0.4, the actual class probability is still 0. Conversely, when the classifier is only 0.6 certain that the class is positive, the actual probability of positive class is already almost 1. - -The widget is set to optimize F1 score. The user can drag the vertical black line left or right to set the probability threshold for the select target value. The information on the calibrated classifier are displayed in the info box on the left. - -![](images/Calibration-Plot-Example1.png) - -In the second example, we show how to use the widget to output a calibrated model. We use [Data Sampler](../data/datasampler.md) to split the data into training and test subsets. We pass both the training and test subsets to **Test and Score** and train a [Logistic Regression](../model/logisticregression.md) model, which we pass to **Calibration Plot**. Note that only a single calibrated model can be on the output, hence the user must select a single model from the classifier list. - -Once the model is calibrated, we can pass it to [Predictions](../evaluate/predictions.md) and use it on training data. - -![](images/Calibration-Plot-Example2.png) diff --git a/doc/visual-programming/source/widgets/evaluate/confusionmatrix.md b/doc/visual-programming/source/widgets/evaluate/confusionmatrix.md deleted file mode 100644 index 873ff427e36..00000000000 --- a/doc/visual-programming/source/widgets/evaluate/confusionmatrix.md +++ /dev/null @@ -1,50 +0,0 @@ -Confusion Matrix -================ - -Shows proportions between the predicted and actual class. - -**Inputs** - -- Evaluation results: results of testing classification algorithms - -**Outputs** - -- Selected Data: data subset selected from confusion matrix -- Data: data with the additional information on whether a data instance was selected - -The [Confusion Matrix](https://en.wikipedia.org/wiki/Confusion_matrix) gives the number/proportion of instances between the predicted and actual class. The selection of the elements in the matrix feeds the corresponding instances into the output signal. This way, one can observe which specific instances were misclassified and how. - -The widget usually gets the evaluation results from [Test & Score](../evaluate/testandscore.md); an example of the schema is shown below. - -![](images/ConfusionMatrix-stamped.png) - -1. When evaluation results contain data on multiple learning algorithms, we have to choose one in the *Learners* box. - The snapshot shows the confusion matrix for [Tree](../model/tree.md) and [Naive Bayesian](../model/naivebayes.md) models trained and tested on the *iris* data. The right-hand side of the widget contains the matrix for the naive Bayesian model (since this model is selected on the left). Each row corresponds to a correct class, while columns represent the predicted classes. For instance, four instances of *Iris-versicolor* were misclassified as *Iris-virginica*. The rightmost column gives the number of instances from each class (there are 50 irises of each of the three classes) and the bottom row gives the number of instances classified into each class (e.g., 48 instances were classified into virginica). -2. In *Show*, we select what data we would like to see in the matrix. - - **Number of instances** shows correctly and incorrectly classified instances numerically. - - **Proportions of predicted** shows how many instances classified as, say, *Iris-versicolor* are in which true class; in the table we can read the 0% of them are actually setosae, 88.5% of those classified as versicolor are versicolors, and 7.7% are virginicae. - - **Proportions of actual** shows the opposite relation: of all true versicolors, 92% were classified as versicolors and 8% as virginicae. - ![](images/ConfusionMatrix-propTrue.png) -3. In *Select*, you can choose the desired output. - - **Correct** sends all correctly classified instances to the output by selecting the diagonal of the matrix. - - **Misclassified** selects the misclassified instances. - - **None** annuls the selection. - As mentioned before, one can also select individual cells of the table to select specific kinds of misclassified instances (e.g. the versicolors classified as virginicae). -4. When sending selected instances, the widget can add new attributes, such as predicted classes or their probabilities, if the corresponding options *Predictions* and/or *Probabilities* are checked. -5. The widget outputs every change if *Send Automatically* is ticked. If not, the user will need to click *Send Selected* to commit the changes. -6. Produce a report. - -Example -------- - -The following workflow demonstrates what this widget can be used for. - -![](images/ConfusionMatrix-Schema.png) - -[Test & Score](../evaluate/testandscore.md) gets the data from [File](../data/file.md) and two learning algorithms from [Naive Bayes](../model/naivebayes.md) and [Tree](../model/tree.md). It performs cross-validation or some other train-and-test procedures to get class predictions by both algorithms for all (or some) data instances. The test results are fed into the **Confusion Matrix**, where we can observe how many instances were misclassified and in which way. - -In the output, we used [Data Table](../data/datatable.md) to show the instances we selected in the confusion matrix. If we, for instance, click *Misclassified*, the table will contain all instances which were misclassified by the selected method. - -The [Scatter Plot](../visualize/scatterplot.md) gets two sets of data. From the [File](../data/file.md) widget it gets the complete data, while the confusion matrix sends only the selected data, misclassifications for instance. The scatter plot will show all the data, with bold symbols representing the selected data. - -![](images/ConfusionMatrix-Example.png) diff --git a/doc/visual-programming/source/widgets/evaluate/icons/calibration-plot.png b/doc/visual-programming/source/widgets/evaluate/icons/calibration-plot.png deleted file mode 100644 index bbce5c62831..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/icons/calibration-plot.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/icons/confusion-matrix.png b/doc/visual-programming/source/widgets/evaluate/icons/confusion-matrix.png deleted file mode 100644 index ace561e5046..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/icons/confusion-matrix.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/icons/lift-curve.png b/doc/visual-programming/source/widgets/evaluate/icons/lift-curve.png deleted file mode 100644 index 134fc149be0..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/icons/lift-curve.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/icons/predictions.png b/doc/visual-programming/source/widgets/evaluate/icons/predictions.png deleted file mode 100644 index 3ef99a4756a..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/icons/predictions.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/icons/roc-analysis.png b/doc/visual-programming/source/widgets/evaluate/icons/roc-analysis.png deleted file mode 100644 index bf1af55fdd5..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/icons/roc-analysis.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/icons/test-and-score.png b/doc/visual-programming/source/widgets/evaluate/icons/test-and-score.png deleted file mode 100644 index 088553170e8..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/icons/test-and-score.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/Calibration-Plot-Example1.png b/doc/visual-programming/source/widgets/evaluate/images/Calibration-Plot-Example1.png deleted file mode 100644 index dace0fe2480..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/Calibration-Plot-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/Calibration-Plot-Example2.png b/doc/visual-programming/source/widgets/evaluate/images/Calibration-Plot-Example2.png deleted file mode 100644 index 204d2d1938f..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/Calibration-Plot-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/Calibration-Plot.png b/doc/visual-programming/source/widgets/evaluate/images/Calibration-Plot.png deleted file mode 100644 index b3f0d711ab1..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/Calibration-Plot.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/ConfusionMatrix-Example.png b/doc/visual-programming/source/widgets/evaluate/images/ConfusionMatrix-Example.png deleted file mode 100644 index 2f6515d11bf..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/ConfusionMatrix-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/ConfusionMatrix-Schema.png b/doc/visual-programming/source/widgets/evaluate/images/ConfusionMatrix-Schema.png deleted file mode 100644 index aa6b412397e..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/ConfusionMatrix-Schema.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/ConfusionMatrix-propTrue.png b/doc/visual-programming/source/widgets/evaluate/images/ConfusionMatrix-propTrue.png deleted file mode 100644 index 8f4edffcf82..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/ConfusionMatrix-propTrue.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/ConfusionMatrix-stamped.png b/doc/visual-programming/source/widgets/evaluate/images/ConfusionMatrix-stamped.png deleted file mode 100644 index c4731323804..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/ConfusionMatrix-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/ParameterFitter.png b/doc/visual-programming/source/widgets/evaluate/images/ParameterFitter.png deleted file mode 100644 index 94505714fea..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/ParameterFitter.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/PerformanceCurve-Example1.png b/doc/visual-programming/source/widgets/evaluate/images/PerformanceCurve-Example1.png deleted file mode 100644 index ac6cf4548ef..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/PerformanceCurve-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/PerformanceCurve-Example2.png b/doc/visual-programming/source/widgets/evaluate/images/PerformanceCurve-Example2.png deleted file mode 100644 index aaf5ee3943a..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/PerformanceCurve-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/PerformanceCurve.png b/doc/visual-programming/source/widgets/evaluate/images/PerformanceCurve.png deleted file mode 100644 index b912343d138..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/PerformanceCurve.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/Permutation-Plot-example.png b/doc/visual-programming/source/widgets/evaluate/images/Permutation-Plot-example.png deleted file mode 100644 index b00b20623c9..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/Permutation-Plot-example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/Permutation-Plot-stamped.png b/doc/visual-programming/source/widgets/evaluate/images/Permutation-Plot-stamped.png deleted file mode 100644 index 05f17cd7d45..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/Permutation-Plot-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/Predictions-Example1.png b/doc/visual-programming/source/widgets/evaluate/images/Predictions-Example1.png deleted file mode 100644 index 55318051171..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/Predictions-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/Predictions-Example2.png b/doc/visual-programming/source/widgets/evaluate/images/Predictions-Example2.png deleted file mode 100644 index 6626a3d5b48..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/Predictions-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/Predictions-stamped.png b/doc/visual-programming/source/widgets/evaluate/images/Predictions-stamped.png deleted file mode 100644 index 318a1ed5e65..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/Predictions-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/ROC-Comparison.png b/doc/visual-programming/source/widgets/evaluate/images/ROC-Comparison.png deleted file mode 100644 index bfcfc7189fc..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/ROC-Comparison.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis-AUC.png b/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis-AUC.png deleted file mode 100644 index 0038b0f56a2..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis-AUC.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis-Plain.png b/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis-Plain.png deleted file mode 100644 index 7bf29e84d4a..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis-Plain.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis-basic-stamped.png b/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis-basic-stamped.png deleted file mode 100644 index 528768b98cc..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis-basic-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis-basic.png b/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis-basic.png deleted file mode 100644 index 483cd9700fb..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis-basic.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis-example.png b/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis-example.png deleted file mode 100644 index da5daa7c835..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis-example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis.png b/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis.png deleted file mode 100644 index 551f8302943..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/ROCAnalysis.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/TestAndScore-Classification.png b/doc/visual-programming/source/widgets/evaluate/images/TestAndScore-Classification.png deleted file mode 100644 index 5e7b89c603e..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/TestAndScore-Classification.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/TestAndScore-Example.png b/doc/visual-programming/source/widgets/evaluate/images/TestAndScore-Example.png deleted file mode 100644 index 41de29d9d5a..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/TestAndScore-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/TestAndScore-Regression.png b/doc/visual-programming/source/widgets/evaluate/images/TestAndScore-Regression.png deleted file mode 100644 index 11e86731584..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/TestAndScore-Regression.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/TestAndScore-stamped.png b/doc/visual-programming/source/widgets/evaluate/images/TestAndScore-stamped.png deleted file mode 100644 index b6859931393..00000000000 Binary files a/doc/visual-programming/source/widgets/evaluate/images/TestAndScore-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/evaluate/parameterfitter.md b/doc/visual-programming/source/widgets/evaluate/parameterfitter.md deleted file mode 100644 index cef2c0c6f24..00000000000 --- a/doc/visual-programming/source/widgets/evaluate/parameterfitter.md +++ /dev/null @@ -1,18 +0,0 @@ -Parameter Fitter -================ - -Find the best hyper-parameters for a model. - -**Inputs** - -- Data: input data -- Learner: learning algorithm - -Parameter fitter shows performance of a learning algorithms with different settings of a hyper-parameter. The widget is currently limited to a single integer parameter. Not all learning algorithms support hyper-parameter tuning. - -![](images/ParameterFitter.png) - -1. Choose the parameter to fit. -2. Define the lower and the upper limit; step size is determined automatically. -3. Alternatively, specifies the values for the parameter. The widget also accepts `...`, e.g. `1, 2, 3, ..., 10` or `40, 60, ..., 100`. When the parameter has a minimal value (e.g. the number of components cannot be negative), one can also omit the lower bound, e.g. `..., 80, 100`; and if the parameter has a maximal value, one can omit the upper bound, e.g. `2, 4, 6, ...,`. -4. A plot showing the performance at different values of the parameter. The graph shows AUC for classification problems and R2 for regression. \ No newline at end of file diff --git a/doc/visual-programming/source/widgets/evaluate/performancecurve.md b/doc/visual-programming/source/widgets/evaluate/performancecurve.md deleted file mode 100644 index 12ba98cc131..00000000000 --- a/doc/visual-programming/source/widgets/evaluate/performancecurve.md +++ /dev/null @@ -1,45 +0,0 @@ -Performance Curve -================= - -Construct and display a performance curve from the evaluation of classifiers. - -**Inputs** - -- Evaluation Results: results of testing classification algorithms - -**Outputs** - -- Calibrated Model: trained model using the calibrated learner - -The **Performance Curve** shows the curves for analysing the proportion of true positive data instances in relation to the classifier's threshold or the number of instances that we classify as positive. It offers three types of performance curves: [lift curve](https://en.wikipedia.org/wiki/Lift_(data_mining)), [cumulative gains](http://mlwiki.org/index.php/Cumulative_Gain_Chart), and [precision-recall curve](https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html). - -**Lift curve** shows the ratio between lift (the proportion of true positive instances to all positive instances in the prediction) and the proportion of positive instances. The higher the initial curve and the longer it is flat, the better the model. See [a tutorial for more details](https://medium.com/analytics-vidhya/understanding-lift-curve-b674d21e426). - -**Cumulative gains** chart shows the ratio of true positive instances (for example, people with heart disease) and support, which is the fraction of positively predicted instances (the ratio of patients with a heart disease in the prediction), assuming that the instances are ordered according to the model's probability of being positive (e.g. how likely the person has the disease). The greater the area between the curve and the baseline (dashed diagonal line), the better the model. - -**Precision-recall curve** shows the ratio between precision (ratio of true positives in positive predictions) and recall (ratio of true positives in positive class) at different thresholds. Ideally one aims at a high area under the curve. - -![](images/PerformanceCurve.png) - -1. Choose the desired *Target class*. The default is chosen alphabetically. Choose whether to observe lift curve, cumulative gains or precision-recall. -2. If test results contain more than one classifier, the user can choose which curves she or he wants to see plotted. Click on a classifier to select or deselect the curve. -3. *Show thresholds* plots a vertical dashed threshold line. The line represent at which probability threshold the prediction is considered positive. The line can be dragged left or right to change the threshold. *Show points* shows individual predictions as points on a plot. This option shows how many points were found at each value of x. -4. If *Apply Automatically* is ticked, changes are communicated automatically. Alternatively, click *Apply*. -5. A plot with the performance curve. The vertical dashed line represents the probability threshold and can be moved interactively. The diagonal dashed line in **cumulative gains** represents a baseline classifier. - -Examples --------- - -The widgets that provide the right type of the signal needed by the **Performance Curve** (evaluation data) are [Test and Score](../evaluate/testandscore.md) and [Predictions](../evaluate/predictions.md). - -In the first example, we observe the lift curve and cumulative gain for the *iris* data, where the classification goal is to predict the type of iris based on the measurements of the flower. We run [Logistic Regression](../model/logisticregression.md) and [Random Forest](../model/randomforest.md) in the [Test and Score](../evaluate/testandscore.md) widget and send the results to **Performance Curve** to see their performance against a random model. Of the two algorithms tested, logistic regression outperforms the random forest. The curve tells us that by picking the first 34% of irises as ranked by the model and setting the probability threshold at 0.276, we are going to retain a perfect lift. - -![](images/PerformanceCurve-Example1.png) - -In the second example, we show how to calibrate a model in the **Performance Curve** widget. We are using the *heart-disease* data. First, the widget requires a single model on the input. This means cross-validation from Test and Score won't work, but there are as many models as there are folds. To pass a single model, use the *Test on test data* option. - -In Performance Curve, we then observe the curve for the positive (1) class. The model has the optimal balance between precision and recall at the probability threshold of 0.475. We select this threshold and the model with the given threshold is sent to the output. - -We can use this model in [Predictions](../evaluate/predictions.md) to predict on new data with the calibrated model. See also [Calibrated Learner](../model/calibratedlearner.md) for more calibration options. - -![](images/PerformanceCurve-Example2.png) diff --git a/doc/visual-programming/source/widgets/evaluate/permutationplot.md b/doc/visual-programming/source/widgets/evaluate/permutationplot.md deleted file mode 100644 index 755b1574df6..00000000000 --- a/doc/visual-programming/source/widgets/evaluate/permutationplot.md +++ /dev/null @@ -1,25 +0,0 @@ -Permutation Plot -================ - -Check the validity and the degree of overfit for the input learner. - -**Inputs** - -- Data: input dataset -- Learner: learning algorithm - -![](images/Permutation-Plot-stamped.png) - -1. Select the number of permutations. The target variable is randomly permuted and the learner is fitted to each permuted dataset. -2. Information on the model performance. -3. Get help, save the plot, make the report, set plot properties. -4. Observe the size and type of inputs. - -The Permutation plot displays the Spearman's rank correlation coefficient between the permuted and original target variable on the x-axis versus the model score (R2 for regression, AUC for classification) on the y-axis. Two sets of points are shown, one for evaluations on the training data and one for cross-validation. A regression line is fitted to each set of points. The intercept is a measure of the overfit. - -Examples --------- - -Here is an example on the housing data, where we analyze the performance of a [Random Forest](../model/randomforest.md) model. - -![](images/Permutation-Plot-example.png) diff --git a/doc/visual-programming/source/widgets/evaluate/predictions.md b/doc/visual-programming/source/widgets/evaluate/predictions.md deleted file mode 100644 index f23240e9c47..00000000000 --- a/doc/visual-programming/source/widgets/evaluate/predictions.md +++ /dev/null @@ -1,51 +0,0 @@ -Predictions -=========== - -Shows models' predictions on the data. - -**Inputs** - -- Data: input dataset -- Predictors: predictors to be used on the data - -**Outputs** - -- Predictions: data with added predictions -- Evaluation Results: results of testing classification algorithms - -The widget receives a dataset and one or more predictors (predictive models, not learning algorithms - see the example below). It outputs the data and the predictions. - -![](images/Predictions-stamped.png) - -1. Information on the input, namely the number of instances to predict, the number of predictors and the task (classification or regression). If you have sorted the data table by attribute and you wish to see the original view, press *Restore Original Order*. -2. You can select the options for classification. If *Predicted class* is ticked, the view provides information on predicted class. If *Predicted probabilities for* is ticked, the view provides information on probabilities predicted by the classifier(s). You can also select the predicted class displayed in the view. The option *Draw distribution bars* provides a visualization of probabilities. -3. By ticking the *Show full dataset*, you can view the entire data table (otherwise only class variable will be shown). -4. Select the desired output. -5. Predictions. - -The widget show the probabilities and final decisions of [predictive models](https://en.wikipedia.org/wiki/Predictive_modelling). The output of the widget is another dataset, where predictions are appended as new meta attributes. You can select which features you wish to output (original data, predictions, probabilities). The result can be observed in a [Data Table](../data/datatable.md). If the predicted data includes true class values, the result of prediction can also be observed in a [Confusion Matrix](../evaluate/confusionmatrix.md). - -Examples --------- - -In the first example, we will use *Attrition - Train* data from the [Datasets](../data/datasets.md) widget. This is a data on attrition of employees. In other words, we wish to know whether a certain employee will resign from the job or not. We will construct a predictive model with the [Tree](../model/tree.md) widget and observe probabilities in **Predictions**. - -For predictions we need both the training data, which we have loaded in the first **Datasets** widget and the data to predict, which we will load in another [Datasets](../data/datasets.md) widget. We will use *Attrition - Predict* data this time. Connect the second data set to **Predictions**. Now we can see predictions for the three data instances from the second data set. - -The [Tree](../model/tree.md) model predicts none of the employees will leave the company. You can try other model and see if predictions change. Or test the predictive scores first in the [Test & Score](../evaluate/testandscore.md) widget. - -![](images/Predictions-Example1.png) - -In the second example, we will see how to properly use [Preprocess](../data/preprocess.md) with **Predictions** or [Test & Score](../evaluate/testandscore.md). - -This time we are using the *heart disease.tab* data from the [File](../data/file.md) widget. You can access the data through the dropdown menu. This is a dataset with 303 patients that came to the doctor suffering from a chest pain. After the tests were done, some patients were found to have diameter narrowing and others did not (this is our class variable). - -The heart disease data have some missing values and we wish to account for that. First, we will split the data set into train and test data with [Data Sampler](../data/datasampler.md). - -Then we will send the *Data Sample* into [Preprocess](../data/preprocess.md). We will use *Impute Missing Values*, but you can try any combination of preprocessors on your data. We will send preprocessed data to [Logistic Regression](../model/logisticregression.md) and the constructed model to **Predictions**. - -Finally, **Predictions** also needs the data to predict on. We will use the output of [Data Sampler](../data/datasampler.md) for prediction, but this time not the *Data Sample*, but the *Remaining Data*, this is the data that wasn't used for training the model. - -Notice how we send the remaining data directly to **Predictions** without applying any preprocessing. This is because Orange handles preprocessing on new data internally to prevent any errors in the model construction. The exact same preprocessor that was used on the training data will be used for predictions. The same process applies to [Test & Score](../evaluate/testandscore.md). - -![](images/Predictions-Example2.png) diff --git a/doc/visual-programming/source/widgets/evaluate/rocanalysis.md b/doc/visual-programming/source/widgets/evaluate/rocanalysis.md deleted file mode 100644 index f2beb8081a5..00000000000 --- a/doc/visual-programming/source/widgets/evaluate/rocanalysis.md +++ /dev/null @@ -1,51 +0,0 @@ -ROC Analysis -============ - -Plots a true positive rate against a false positive rate of a test. - -**Inputs** - -- Evaluation Results: results of testing classification algorithms - -The widget shows ROC curves for the tested models and the corresponding convex hull. It serves as a mean of comparison between classification models. The curve plots a false positive rate on an x-axis (1-specificity; probability that target=1 when true value=0) against a true positive rate on a y-axis (sensitivity; probability that target=1 when true value=1). The closer the curve follows the left-hand border and then the top border of the ROC space, the more accurate the classifier. Given the costs of false positives and false negatives, the widget can also determine the optimal classifier and threshold. - -![](images/ROCAnalysis-basic-stamped.png) - -1. Choose the desired *Target Class*. The default class is chosen alphabetically. -2. If test results contain more than one classifier, the user can choose which curves she or he wants to see plotted. Click on a classifier to select or deselect it. -3. When the data comes from multiple iterations of training and testing, such as k-fold cross validation, the results can be (and usually are) averaged. - ![](images/ROC-Comparison.png) - The averaging options are: - - **Merge predictions from folds** (top left), which treats all the test data as if they came from a single iteration - - **Mean TP rate** (top right) averages the curves vertically, showing the corresponding confidence intervals - - **Mean TP and FP at threshold** (bottom left) traverses over threshold, averages the positions of curves and shows horizontal and vertical confidence intervals - - **Show individual curves** (bottom right) does not average but prints all the curves instead -4. Option *Show convex ROC curves* refers to convex curves over each individual classifier (the thin lines positioned over curves). *Show ROC convex hull* plots a convex hull combining all classifiers (the gray area below the curves). Plotting both types of convex curves makes sense since selecting a threshold in a concave part of the curve cannot yield optimal results, disregarding the cost matrix. Besides, it is possible to reach any point on the convex curve by combining the classifiers represented by the points on the border of the concave region. - ![](images/ROCAnalysis-AUC.png) -The diagonal dotted line represents the behavior of a random classifier. The full diagonal line represents iso-performance. A black "*A*" symbol at the bottom of the graph proportionally readjusts the graph. -5. The final box is dedicated to the analysis of the curve. The user can specify the cost of false positives (FP) and false negatives (FN), and the prior target class probability. - - - *Default threshold (0.5) point* shows the point on the ROC curve achieved by the classifier if it predicts the target class if its probability equals or exceeds 0.5. - - *Show performance line* shows iso-performance in the ROC space so that all the points on the line give the same profit/loss. The line further to the upper left is better than the one down and right. The direction of the line depends upon costs and probabilities. This gives a recipe for depicting the optimal threshold for the given costs: this is the point where the tangent with the given inclination touches the curve and it is marked in the plot. If we push the iso-performance higher or more to the left, the points on the iso-performance line cannot be reached by the learner. Going down or to the right, decreases the performance. - - The widget allows setting the costs from 1 to 1000. Units are not important, as are not the magnitudes. What matters is the relation between the two costs, so setting them to 100 and 200 will give the same result as 400 and 800. - Defaults: both costs equal (500), Prior target class probability 50%(from the data). - ![](images/ROCAnalysis-Plain.png) - False positive cost: 830, False negative cost 650, Prior target - class probability 73%. - ![](images/ROCAnalysis.png) -6. Press *Save Image* if you want to save the created image to your - computer in a .svg or .png format. -7. Produce a report. - -The widget will output a model with a new operating threshold if - -- input results come from testing on a single data set and not in some type of cross validation (which produces multiple potentially different models), -- only a single curve is shown, -- and the widget shows the performance line, which indicates the new operating threshold - the one that corresponds to the point at which the line touches the curve. - -Example -------- - -At the moment, the only widget which gives the right type of signal needed by the **ROC Analysis** is [Test & Score](../evaluate/testandscore.md). Below, we compare two classifiers, namely [Tree](../model/tree.md) and [Naive Bayes](../model/naivebayes.md), in **Test\&Score** and then compare their performance in **ROC Analysis**, [Life Curve](../evaluate/performancecurve.md) and [Calibration Plot](../evaluate/calibrationplot.md). - -![](images/ROCAnalysis-example.png) diff --git a/doc/visual-programming/source/widgets/evaluate/testandscore.md b/doc/visual-programming/source/widgets/evaluate/testandscore.md deleted file mode 100644 index d33a92afc76..00000000000 --- a/doc/visual-programming/source/widgets/evaluate/testandscore.md +++ /dev/null @@ -1,82 +0,0 @@ -Test and Score -============== - -Tests learning algorithms on data. - -**Inputs** - -- Data: input dataset -- Test Data: separate data for testing -- Learner: learning algorithm(s) - -**Outputs** - -- Evaluation Results: results of testing classification algorithms - -The widget tests learning algorithms. Different sampling schemes are available, including using separate test data. The widget does two things. First, it shows a table with different classifier performance measures, such as [classification accuracy](https://en.wikipedia.org/wiki/Accuracy_and_precision) and [area under the curve](https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve). Second, it outputs evaluation results, which can be used by other widgets for analyzing the performance of classifiers, such as [ROC Analysis](../evaluate/rocanalysis.md) or [Confusion Matrix](../evaluate/confusionmatrix.md). - -The *Learner* signal has an uncommon property: it can be connected to more than one widget to test multiple learners with the same procedures. - -![](images/TestAndScore-stamped.png) - -1. The widget supports various sampling methods. - - [Cross-validation](https://en.wikipedia.org/wiki/Cross-validation_\(statistics\)) splits the data into a given number of folds (usually 5 or 10). The algorithm is tested by holding out examples from one fold at a time; the model is induced from other folds and examples from the held out fold are classified. This is repeated for all the folds. - - **Cross validation by feature** performs cross-validation but folds are defined by the selected categorical feature from meta-features. - - **Random sampling** randomly splits the data into the training and testing set in the given proportion (e.g. 70:30); the whole procedure is repeated for a specified number of times. - - **Leave-one-out** is similar, but it holds out one instance at a time, inducing the model from all others and then classifying the held out instances. This method is obviously very stable, reliable... and very slow. - - **Test on train data** uses the whole dataset for training and then for testing. This method practically always gives wrong results. - - **Test on test data**: the above methods use the data from *Data* signal only. To input another dataset with testing examples (for instance from another file or some data selected in another widget), we select *Separate Test Data* signal in the communication channel and select Test on test data. -2. For classification, *Target class* can be selected at the bottom of the widget. When *Target class* is (Average over classes), methods return scores that are weighted averages over all classes. For example, in case of the classifier with 3 classes, scores are computed for class 1 as a target class, class 2 as a target class, and class 3 as a target class. Those scores are averaged with weights based on the class size to retrieve the final score. -3. The widget will compute a number of performance statistics. A few are shown by default. To see others, right-click on the header and select the desired statistic. - - Classification - ![](images/TestAndScore-Classification.png) - - [Area under ROC](http://gim.unmc.edu/dxtests/roc3.htm) is the area under the receiver-operating curve. - - [Classification accuracy](https://en.wikipedia.org/wiki/Accuracy_and_precision) is the proportion of correctly classified examples. - - [F-1](https://en.wikipedia.org/wiki/F1_score) is a weighted harmonic mean of precision and recall (see below). - - [Precision](https://en.wikipedia.org/wiki/Precision_and_recall) is the proportion of true positives among instances classified as positive, e.g. the proportion of *Iris virginica* correctly identified as Iris virginica. - - [Recall](https://en.wikipedia.org/wiki/Precision_and_recall) is the proportion of true positives among all positive instances in the data, e.g. the number of sick among all diagnosed as sick. - - [Specificity](https://en.wikipedia.org/wiki/Sensitivity_and_specificity) is the proportion of true negatives among all negative instances, e.g. the number of non-sick among all diagnosed as non-sick. - - [LogLoss](https://en.wikipedia.org/wiki/Cross_entropy) or cross-entropy loss takes into account the uncertainty of your prediction based on how much it varies from the actual label. - - [Matthews correlation coefficient](https://en.wikipedia.org/wiki/Phi_coefficient) takes into account true and false positives and negatives and is generally regarded as a balanced measure which can be used even if the classes are of very different sizes. - - Train time - cumulative time in seconds used for training models. - - Test time - cumulative time in seconds used for testing models. - - Regression - ![](images/TestAndScore-Regression.png) - - [MSE](https://en.wikipedia.org/wiki/Mean_squared_error) measures the average of the squares of the errors or deviations (the difference between the estimator and what is estimated). - - [RMSE](https://en.wikipedia.org/wiki/Root_mean_square) is the square root of the arithmetic mean of the squares of a set of numbers (a measure of imperfection of the fit of the estimator to the data) - - [MAE]() is used to measure how close forecasts or predictions are to eventual outcomes. - - [R2]() is interpreted as the proportion of the variance in the dependent variable that is predictable from the independent variable. - - [CVRMSE](https://en.wikipedia.org/wiki/Root-mean-square_deviation) is RMSE normalized by the mean value of actual values. - - Train time - cumulative time in seconds used for training models. - - Test time - cumulative time in seconds used for testing models. -4. Choose the score for pairwise comparison of models and the region of practical equivalence (ROPE), in which differences are considered negligible. -5. Pairwise comparison of models using the selected score (available only for cross-validation). The number in the table gives the probability that the model corresponding to the row has a higher score than the model corresponding to the column. What the higher score means depends on the metric: a higher score can either mean a model is better (for example, CA or AUC) or the opposite (for example, RMSE). If negligible difference is enabled, the smaller number below shows the probability that the difference between the pair is negligible. The test is based on the [Bayesian interpretation of the t-test](https://link.springer.com/article/10.1007/s10994-015-5486-z) ([shorter introduction](https://baycomp.readthedocs.io/en/latest/introduction.html)). -6. Get help and produce a report. - -Preprocessing for predictive modeling --------------------------------------- - -When building predictive models, one has to be careful about how to preprocess the data. There are two possible ways to do it in Orange, each slightly different: - -1. Connect [Preprocess](../data/preprocess.md) to the learner. This will override the default preprocessing pipeline for the learner and apply only custom preprocessing pipeline (default preprocessing steps are described in each learner's documentation). The procedure might lead to errors within the learner. - - ![](../data/images/Preprocess-Models1.png) - -2. Connect **Preprocess** to Test and Score. This will apply the preprocessors to each batch within cross-validation. Then the learner's preprocessors will be applied to the preprocessed subset. - - ![](../data/images/Preprocess-Models2.png) - -Finally, there's a wrong way to do it. Connecting **Preprocess** directly to the original data and outputting preprocessed data set will likely overfit the model. Don't do it. - - ![](../data/images/Preprocess-Models3.png) - -Example -------- - -In a typical use of the widget, we give it a dataset and a few learning algorithms and we observe their performance in the table inside the **Test & Score** widget and in the [ROC](../evaluate/rocanalysis.md). The data is often preprocessed before testing; in this case we did some manual feature selection ([Select Columns](../data/selectcolumns.md) widget) on *Titanic* dataset, where we want to know only the sex and status of the survived and omit the age. - -In the bottom table, we have a pairwise comparison of models. We selected that comparison is based on the _area under ROC curve_ statistic. The number in the table gives the probability that the model corresponding to the row is better than the model corresponding to the column. We can, for example, see that probability for the tree to be better than SVM is almost one, and the probability that tree is better than Naive Bayes is 0.001. Smaller numbers in the table are probabilities that the difference between the pair is negligible based on the negligible threshold 0.1. - -![](images/TestAndScore-Example.png) - -Another example of using this widget is presented in the documentation for the [Confusion Matrix](../evaluate/confusionmatrix.md) widget. diff --git a/doc/visual-programming/source/widgets/mkdocs.yml b/doc/visual-programming/source/widgets/mkdocs.yml deleted file mode 100644 index 406c357d83e..00000000000 --- a/doc/visual-programming/source/widgets/mkdocs.yml +++ /dev/null @@ -1,2 +0,0 @@ -site_name: My Docs -docs_dir: . diff --git a/doc/visual-programming/source/widgets/model/adaboost.md b/doc/visual-programming/source/widgets/model/adaboost.md deleted file mode 100644 index 6e7c4e8189d..00000000000 --- a/doc/visual-programming/source/widgets/model/adaboost.md +++ /dev/null @@ -1,55 +0,0 @@ -AdaBoost -======== - -An ensemble meta-algorithm that combines weak learners and adapts to the 'hardness' of each training sample. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) -- Learner: learning algorithm - -**Outputs** - -- Learner: AdaBoost learning algorithm -- Model: trained model - -The [AdaBoost](https://en.wikipedia.org/wiki/AdaBoost) (short for "Adaptive boosting") widget is a machine-learning algorithm, formulated by [Yoav Freund and Robert Schapire](https://cseweb.ucsd.edu/~yfreund/papers/IntroToBoosting.pdf). It can be used with other learning algorithms to boost their performance. It does so by tweaking the weak learners. - -**AdaBoost** works for both classification and regression. - -![](images/AdaBoost-stamped.png) - -1. The learner can be given a name under which it will appear in other widgets. The default name is "AdaBoost". -2. Set the parameters. The base estimator is a tree and you can set: - - *Number of estimators* - - *Learning rate*: it determines to what extent the newly acquired information will override the old information (0 = the agent will not learn anything, 1 = the agent considers only the most recent information) - - *Fixed seed for random generator*: set a fixed seed to enable reproducing the results. -3. Boosting method. - - *Classification algorithm* (if classification on input): SAMME (updates base estimator's weights with classification results) or SAMME.R (updates base estimator's weight with probability estimates). - - *Regression loss function* (if regression on input): Linear (), Square (), Exponential (). -4. Produce a report. -5. Click *Apply* after changing the settings. That will put the new learner in the output and, if the training examples are given, construct a new model and output it as well. To communicate changes automatically tick *Apply Automatically*. - -Preprocessing -------------- - -AdaBoost uses default preprocessing when no other preprocessors are given. It executes them in the following order: - -- removes instances with unknown target values -- continuizes categorical variables (with one-hot-encoding) -- removes empty columns -- imputes missing values with mean values - -To remove default preprocessing, connect an empty [Preprocess](../data/preprocess.md) widget to the learner. - -Examples --------- - -For classification, we loaded the *iris* dataset. We used *AdaBoost*, [Tree](../model/tree.md) and [Logistic Regression](../model/logisticregression.md) and evaluated the models' performance in [Test & Score](../evaluate/testandscore.md). - -![](images/AdaBoost-classification.png) - -For regression, we loaded the *housing* dataset, sent the data instances to two different models (**AdaBoost** and [Tree](../model/tree.md)) and output them to the [Predictions](../evaluate/predictions.md) widget. - -![](images/AdaBoost-regression.png) diff --git a/doc/visual-programming/source/widgets/model/calibratedlearner.md b/doc/visual-programming/source/widgets/model/calibratedlearner.md deleted file mode 100644 index 4da3facc34e..00000000000 --- a/doc/visual-programming/source/widgets/model/calibratedlearner.md +++ /dev/null @@ -1,45 +0,0 @@ -Calibrated Learner -================== - -Wraps another learner with probability calibration and decision threshold optimization. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) -- Base Learner: learner to calibrate - -**Outputs** - -- Learner: calibrated learning algorithm -- Model: trained model using the calibrated learner - -This learner produces a model that calibrates the distribution of class probabilities and optimizes decision threshold. The widget works only for binary classification tasks. - -![](images/Calibrated-Learner-stamped.png) - -1. The name under which it will appear in other widgets. Default name is composed of the learner, calibration and optimization parameters. -2. Probability calibration: - - - [Sigmoid calibration](http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.41.1639) - - [Isotonic calibration](https://scikit-learn.org/stable/auto_examples/plot_isotonic_regression.html) - - No calibration - -3. Decision threshold optimization: - - - Optimize classification accuracy - - Optimize F1 score - - No threshold optimization - -4. Press *Apply* to commit changes. If *Apply Automatically* is ticked, changes are committed automatically. - -Example -------- - -A simple example with **Calibrated Learner**. We are using the *titanic* data set as the widget requires binary class values (in this case they are 'survived' and 'not survived'). - -We will use [Logistic Regression](logisticregression.md) as the base learner which will we calibrate with the default settings, that is with sigmoid optimization of distribution values and by optimizing the CA. - -Comparing the results with the uncalibrated **Logistic Regression** model we see that the calibrated model performs better. - -![](images/Calibrated-Learner-Example.png) diff --git a/doc/visual-programming/source/widgets/model/cn2ruleinduction.md b/doc/visual-programming/source/widgets/model/cn2ruleinduction.md deleted file mode 100644 index 3026db3eb45..00000000000 --- a/doc/visual-programming/source/widgets/model/cn2ruleinduction.md +++ /dev/null @@ -1,70 +0,0 @@ -CN2 Rule Induction -================== - -Induce rules from data using CN2 algorithm. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) - -**Outputs** - -- Learner: CN2 learning algorithm -- CN2 Rule Classifier: trained model - -The CN2 algorithm is a classification technique designed for the efficient induction of simple, comprehensible rules of form "if *cond* then predict *class*", even in domains where noise may be present. - -**CN2 Rule Induction** works only for classification. - -![](images/CN2-stamped.png) - -1. Name under which the learner appears in other widgets. The default name is *CN2 Rule Induction*. -2. *Rule ordering*: - - **Ordered**: induce ordered rules (decision list). Rule conditions are found and the majority class is assigned in the rule head. - - **Unordered**: induce unordered rules (rule set). Learn rules for each class individually, in regard to the original learning data. -3. *Covering algorithm*: - - **Exclusive**: after covering a learning instance, remove it from further consideration. - - **Weighted**: after covering a learning instance, decrease its weight (multiplication by *gamma*) and in-turn decrease its impact on further iterations of the algorithm. -4. *Rule search*: - - **Evaluation measure**: select a heuristic to evaluate found hypotheses: - - [Entropy](https://en.wikipedia.org/wiki/Entropy_(information_theory)) (measure of unpredictability of content) - - [Laplace Accuracy](https://en.wikipedia.org/wiki/Laplace%27s_method) - - Weighted Relative Accuracy - - **Beam width**; remember the best rule found thus far and monitor a fixed number of alternatives (the beam). -5. *Rule filtering*: - - **Minimum rule coverage**: found rules must cover at least the minimum required number of covered examples. Unordered rules must cover this many target class examples. - - **Maximum rule length**: found rules may combine at most the maximum allowed number of selectors (conditions). - - **Default alpha**: significance testing to prune out most specialised (less frequently applicable) rules in regard to the initial distribution of classes. - - **Parent alpha**: significance testing to prune out most specialised (less frequently applicable) rules in regard to the parent class distribution. -6. Tick 'Apply Automatically' to auto-communicate changes to other widgets and to immediately train the classifier if learning data is connected. Alternatively, press ‘Apply‘ after configuration. - -Preprocessing -------------- - -CN2 Rule Induction uses default preprocessing when no other preprocessors are given. It executes them in the following order: - -- removes empty columns -- removes instances with unknown target values -- imputes missing values with mean values - -To remove default preprocessing, connect an empty [Preprocess](../data/preprocess.md) widget to the learner. - -Examples --------- - -For the example below, we have used *zoo* dataset and passed it to **CN2 Rule Induction**. We can review and interpret the built model with [CN2 Rule Viewer](../visualize/cn2ruleviewer.md) widget. - -![](images/CN2-visualize.png) - -The second workflow tests evaluates **CN2 Rule Induction** and [Tree](../model/tree.md) in [Test & Score](../evaluate/testandscore.md). - -![](images/CN2-classification.png) - -References ----------- - -1. Fürnkranz, Johannes. "Separate-and-Conquer Rule Learning", Artificial Intelligence Review 13, 3-54, 1999. -2. Clark, Peter and Tim Niblett. "The CN2 Induction Algorithm", Machine Learning Journal, 3 (4), 261-283, 1989. -3. Clark, Peter and Robin Boswell. "Rule Induction with CN2: Some Recent Improvements", Machine Learning - Proceedings of the 5th European Conference (EWSL-91),151-163, 1991. -4. Lavrač, Nada et al. "Subgroup Discovery with CN2-SD",Journal of Machine Learning Research 5, 153-188, 2004 diff --git a/doc/visual-programming/source/widgets/model/constant.md b/doc/visual-programming/source/widgets/model/constant.md deleted file mode 100644 index c5d06c46440..00000000000 --- a/doc/visual-programming/source/widgets/model/constant.md +++ /dev/null @@ -1,47 +0,0 @@ -Constant -======== - -Predict the most frequent class or mean value from the training set. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) - -**Outputs** - -- Learner: majority/mean learning algorithm -- Model: trained model - -This learner produces a model that always predicts the[majority](https://en.wikipedia.org/wiki/Predictive_modelling#Majority_classifier) for classification tasks and [mean value](https://en.wikipedia.org/wiki/Mean) for regression tasks. - -For classification, when predicting the class value with [Predictions](../evaluate/predictions.md), the widget will return relative frequencies of the classes in the training set. When there are two or more majority classes, the classifier chooses the predicted class randomly, but always returns the same class for a particular example. - -For regression, it *learns* the mean of the class variable and returns a predictor with the same mean value. - -The widget is typically used as a baseline for other models. - -![](images/Constant-stamped.png) - -This widget provides the user with two options: - -1. The name under which it will appear in other widgets. Default name is "Constant". -2. Produce a report. - -If you change the widget's name, you need to click *Apply*. Alternatively, tick the box on the left side and changes will be communicated automatically. - -Preprocessing -------------- - -Constant does not use any preprocessing. - -Examples --------- - -In a typical classification example, we would use this widget to compare the scores of other learning algorithms (such as kNN) with the default scores. Use *iris* dataset and connect it to [Test & Score](../evaluate/testandscore.md). Then connect **Constant** and [kNN](../model/knn.md) to [Test & Score](../evaluate/testandscore.md) and observe how well [kNN](../model/knn.md) performs against a constant baseline. - -![](images/Constant-classification.png) - -For regression, we use **Constant** to construct a predictor in [Predictions](../evaluate/predictions.md). We used the *housing* dataset. In **Predictions**, you can see that *Mean Learner* returns one (mean) value for all instances. - -![](images/Constant-regression.png) diff --git a/doc/visual-programming/source/widgets/model/curvefit.md b/doc/visual-programming/source/widgets/model/curvefit.md deleted file mode 100644 index 9ad4f6db763..00000000000 --- a/doc/visual-programming/source/widgets/model/curvefit.md +++ /dev/null @@ -1,49 +0,0 @@ -Curve Fit -========= - -Fit a function to data. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) - -**Outputs** - -- Learner: curve fit learning algorithm -- Model: trained model -- Coefficients: fitted coefficients - -The **Curve Fit** widget fits an arbitrary function to the input data. It only works for regression tasks. -The widget uses [scipy.curve_fit](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html) to find the optimal values of the parameters. - -The widget works only on regression tasks and only numerical features can be used for fitting. - -![](images/CurveFit-stamped.png) - -1. The learner/predictor name. -2. Introduce model parameters. -3. Input an expression in Python. The expression should consist of at least one fitting parameter. -4. Select a feature to include into the expression. Only numerical features are available. -5. Select a parameter. Only the introduced parameters are available. -6. Select a function. -7. Press *Apply* to commit changes. If *Apply Automatically* is ticked, changes are committed automatically. -8. Show help, produce a report, input/output info. - -Preprocessing -------------- - -Curve fit uses default preprocessing when no other preprocessors are given. It executes them in the following order: - -- removes instances with unknown target values -- removes empty columns -- imputes missing values with mean values - -To remove default preprocessing, connect an empty [Preprocess](../data/preprocess.md) widget to the learner. - -Example -------- - -Below, is a simple workflow with *housing* dataset. Due to example simplicity we used only a single feature. Unlike the other modelling widgets, the Curve Fit needs data on the input. We trained **Curve Fit** and [Linear Regression](../model/linearregression.md) and evaluated their performance in [Test & Score](../evaluate/testandscore.md). - -![](images/CurveFit-example.png) diff --git a/doc/visual-programming/source/widgets/model/gradientboosting.md b/doc/visual-programming/source/widgets/model/gradientboosting.md deleted file mode 100644 index a59caab4dcc..00000000000 --- a/doc/visual-programming/source/widgets/model/gradientboosting.md +++ /dev/null @@ -1,63 +0,0 @@ -Gradient Boosting -================= - -Predict using gradient boosting on decision trees. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) - -**Outputs** - -- Learner: gradient boosting learning algorithm -- Model: trained model - -[Gradient Boosting](https://en.wikipedia.org/wiki/Gradient_boosting) is a machine learning technique for regression and classification problems, which produces a prediction model in the form of an ensemble of weak prediction models, typically decision trees. - -![](images/GradientBoosting-stamped.png) - -1. Specify the name of the model. The default name is "Gradient Boosting". -2. Select a gradient boosting method: - - [Gradient Boosting (scikit-learn)](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html) - - [Extreme Gradient Boosting (xgboost)](https://xgboost.readthedocs.io/en/latest/index.html) - - [Extreme Gradient Boosting Random Forest (xgboost)](https://xgboost.readthedocs.io/en/latest/index.html) - - [Gradient Boosting (catboost)](https://catboost.ai/docs/concepts/python-quickstart.html) -3. Basic properties: - - *Number of trees*: Specify how many gradient boosted trees will be included. A large number usually results in better performance. - - *Learning rate*: Specify the boosting learning rate. Learning rate shrinks the contribution of each tree. - - *Replicable training*: Fix the random seed, which enables replicability of the results. - - *Regularization*: Specify the L2 regularization term. Available only for *xgboost* and *catboost* methods. -4. Growth control: - - *Limit depth of individual trees*: Specify the maximum depth of the individual tree. - - *Do not split subsets smaller than*: Specify the smallest subset that can be split. Available only for *scikit-learn* methods. -5. Subsampling: - - *Fraction of training instances*: Specify the percentage of the training instances for fitting the individual tree. Available for *scikit-learn* and *xgboost* methods. - - *Fraction of features for each tree*: Specify the percentage of features to use when constructing each tree. Available for *xgboost* and *catboost* methods. - - *Fraction of features for each level*: Specify the percentage of features to use for each level. Available only for *xgboost* methods. - - *Fraction of features for each split*: Specify the percentage of features to use for each split. Available only for *xgboost* methods. -6. Click *Apply* to communicate the changes to other widgets. Alternatively, tick the box on the left side of the *Apply* button and changes will be communicated automatically. - -Preprocessing -------------- - -Gradient Boosting uses default preprocessing when no other preprocessors are given. It executes them in the following order: - -- removes instances with unknown target values -- continuizes categorical variables (with one-hot-encoding) -- removes empty columns -- imputes missing values with mean values - -To remove default preprocessing, connect an empty [Preprocess](../data/preprocess.md) widget to the learner. - -Feature Scoring ---------------- - -Gradient Boosting can be used with Rank for feature scoring. See [Learners as Scorers](../../learners-as-scorers/index.md) for an example. - -Example -------- - -For a classification tasks, we use the *heart disease* data. Here, we compare all available methods in the [Test & Score](../evaluate/testandscore.md) widget. - -![](images/GradientBoosting-example.png) diff --git a/doc/visual-programming/source/widgets/model/icons/adaboost.png b/doc/visual-programming/source/widgets/model/icons/adaboost.png deleted file mode 100644 index 595fe39a188..00000000000 Binary files a/doc/visual-programming/source/widgets/model/icons/adaboost.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/icons/cn2ruleinduction.png b/doc/visual-programming/source/widgets/model/icons/cn2ruleinduction.png deleted file mode 100644 index 6e6657d9288..00000000000 Binary files a/doc/visual-programming/source/widgets/model/icons/cn2ruleinduction.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/icons/constant.png b/doc/visual-programming/source/widgets/model/icons/constant.png deleted file mode 100755 index 376cad654da..00000000000 Binary files a/doc/visual-programming/source/widgets/model/icons/constant.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/icons/knn.png b/doc/visual-programming/source/widgets/model/icons/knn.png deleted file mode 100644 index 0a85f615830..00000000000 Binary files a/doc/visual-programming/source/widgets/model/icons/knn.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/icons/linear-regression.png b/doc/visual-programming/source/widgets/model/icons/linear-regression.png deleted file mode 100755 index c0eb5bada86..00000000000 Binary files a/doc/visual-programming/source/widgets/model/icons/linear-regression.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/icons/load-model.png b/doc/visual-programming/source/widgets/model/icons/load-model.png deleted file mode 100644 index 0efb11a9676..00000000000 Binary files a/doc/visual-programming/source/widgets/model/icons/load-model.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/icons/logistic-regression.png b/doc/visual-programming/source/widgets/model/icons/logistic-regression.png deleted file mode 100644 index 4aaa167d3cb..00000000000 Binary files a/doc/visual-programming/source/widgets/model/icons/logistic-regression.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/icons/naive-bayes.png b/doc/visual-programming/source/widgets/model/icons/naive-bayes.png deleted file mode 100644 index bfd77fc501f..00000000000 Binary files a/doc/visual-programming/source/widgets/model/icons/naive-bayes.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/icons/neural-network.png b/doc/visual-programming/source/widgets/model/icons/neural-network.png deleted file mode 100755 index 124c82b1c3e..00000000000 Binary files a/doc/visual-programming/source/widgets/model/icons/neural-network.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/icons/random-forest.png b/doc/visual-programming/source/widgets/model/icons/random-forest.png deleted file mode 100644 index ee187a5e895..00000000000 Binary files a/doc/visual-programming/source/widgets/model/icons/random-forest.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/icons/save-model.png b/doc/visual-programming/source/widgets/model/icons/save-model.png deleted file mode 100644 index 28e87b882e8..00000000000 Binary files a/doc/visual-programming/source/widgets/model/icons/save-model.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/icons/stacking.png b/doc/visual-programming/source/widgets/model/icons/stacking.png deleted file mode 100644 index 565d7045f91..00000000000 Binary files a/doc/visual-programming/source/widgets/model/icons/stacking.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/icons/stochastic-gradient.png b/doc/visual-programming/source/widgets/model/icons/stochastic-gradient.png deleted file mode 100755 index f35f639f5e2..00000000000 Binary files a/doc/visual-programming/source/widgets/model/icons/stochastic-gradient.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/icons/svm.png b/doc/visual-programming/source/widgets/model/icons/svm.png deleted file mode 100644 index 44b72f4b284..00000000000 Binary files a/doc/visual-programming/source/widgets/model/icons/svm.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/icons/tree.png b/doc/visual-programming/source/widgets/model/icons/tree.png deleted file mode 100644 index 82315cf0371..00000000000 Binary files a/doc/visual-programming/source/widgets/model/icons/tree.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/AdaBoost-classification.png b/doc/visual-programming/source/widgets/model/images/AdaBoost-classification.png deleted file mode 100644 index 7e516255edd..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/AdaBoost-classification.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/AdaBoost-regression.png b/doc/visual-programming/source/widgets/model/images/AdaBoost-regression.png deleted file mode 100644 index ffef8d3e41e..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/AdaBoost-regression.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/AdaBoost-stamped.png b/doc/visual-programming/source/widgets/model/images/AdaBoost-stamped.png deleted file mode 100644 index 7f02a1eb73f..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/AdaBoost-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/CN2-classification.png b/doc/visual-programming/source/widgets/model/images/CN2-classification.png deleted file mode 100644 index b0c95e61c7e..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/CN2-classification.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/CN2-stamped.png b/doc/visual-programming/source/widgets/model/images/CN2-stamped.png deleted file mode 100644 index 43cc1637271..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/CN2-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/CN2-visualize.png b/doc/visual-programming/source/widgets/model/images/CN2-visualize.png deleted file mode 100644 index cdfe1cf7c84..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/CN2-visualize.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/Calibrated-Learner-Example.png b/doc/visual-programming/source/widgets/model/images/Calibrated-Learner-Example.png deleted file mode 100644 index 169b92ddd62..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/Calibrated-Learner-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/Calibrated-Learner-stamped.png b/doc/visual-programming/source/widgets/model/images/Calibrated-Learner-stamped.png deleted file mode 100644 index a215105854d..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/Calibrated-Learner-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/Constant-classification.png b/doc/visual-programming/source/widgets/model/images/Constant-classification.png deleted file mode 100644 index 0795f579329..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/Constant-classification.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/Constant-regression.png b/doc/visual-programming/source/widgets/model/images/Constant-regression.png deleted file mode 100644 index aa4865e92e0..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/Constant-regression.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/Constant-stamped.png b/doc/visual-programming/source/widgets/model/images/Constant-stamped.png deleted file mode 100644 index 281e9caf4d6..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/Constant-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/CurveFit-example.png b/doc/visual-programming/source/widgets/model/images/CurveFit-example.png deleted file mode 100644 index 42e4efa51d3..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/CurveFit-example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/CurveFit-stamped.png b/doc/visual-programming/source/widgets/model/images/CurveFit-stamped.png deleted file mode 100644 index 911f31ca9e0..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/CurveFit-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/GradientBoosting-example.png b/doc/visual-programming/source/widgets/model/images/GradientBoosting-example.png deleted file mode 100644 index 7180940831b..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/GradientBoosting-example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/GradientBoosting-stamped.png b/doc/visual-programming/source/widgets/model/images/GradientBoosting-stamped.png deleted file mode 100644 index 665438b240e..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/GradientBoosting-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/Linear-Regression-coefficients.png b/doc/visual-programming/source/widgets/model/images/Linear-Regression-coefficients.png deleted file mode 100644 index b88eeb0883c..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/Linear-Regression-coefficients.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/Linear-Regression-workflow.png b/doc/visual-programming/source/widgets/model/images/Linear-Regression-workflow.png deleted file mode 100644 index 6794c966bdd..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/Linear-Regression-workflow.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/Linear-Regression.png b/doc/visual-programming/source/widgets/model/images/Linear-Regression.png deleted file mode 100644 index 41d922d3639..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/Linear-Regression.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/LinearRegression-regression.png b/doc/visual-programming/source/widgets/model/images/LinearRegression-regression.png deleted file mode 100644 index 3b84f327a6f..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/LinearRegression-regression.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/LoadModel-example.png b/doc/visual-programming/source/widgets/model/images/LoadModel-example.png deleted file mode 100644 index 1ea03eae623..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/LoadModel-example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/LoadModel-stamped.png b/doc/visual-programming/source/widgets/model/images/LoadModel-stamped.png deleted file mode 100644 index 463edb1b500..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/LoadModel-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/LogisticRegression-classification.png b/doc/visual-programming/source/widgets/model/images/LogisticRegression-classification.png deleted file mode 100644 index c172871d2ac..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/LogisticRegression-classification.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/LogisticRegression-stamped.png b/doc/visual-programming/source/widgets/model/images/LogisticRegression-stamped.png deleted file mode 100644 index 39bc17a1691..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/LogisticRegression-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/NN-Example-Predict.png b/doc/visual-programming/source/widgets/model/images/NN-Example-Predict.png deleted file mode 100644 index 1b79f92740b..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/NN-Example-Predict.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/NN-Example-Test.png b/doc/visual-programming/source/widgets/model/images/NN-Example-Test.png deleted file mode 100644 index d1dfbf705d0..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/NN-Example-Test.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/NaiveBayes-classification.png b/doc/visual-programming/source/widgets/model/images/NaiveBayes-classification.png deleted file mode 100644 index 61328818346..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/NaiveBayes-classification.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/NaiveBayes-stamped.png b/doc/visual-programming/source/widgets/model/images/NaiveBayes-stamped.png deleted file mode 100644 index a159f4d9e26..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/NaiveBayes-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/NaiveBayes-visualize.png b/doc/visual-programming/source/widgets/model/images/NaiveBayes-visualize.png deleted file mode 100644 index a09d18424ad..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/NaiveBayes-visualize.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/NeuralNetwork-stamped.png b/doc/visual-programming/source/widgets/model/images/NeuralNetwork-stamped.png deleted file mode 100644 index a5ec6e24620..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/NeuralNetwork-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/PLS-Example.png b/doc/visual-programming/source/widgets/model/images/PLS-Example.png deleted file mode 100644 index d635d86f0d0..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/PLS-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/PLS-stamped.png b/doc/visual-programming/source/widgets/model/images/PLS-stamped.png deleted file mode 100644 index 9fe4e4c9308..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/PLS-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/RandomForest-classification.png b/doc/visual-programming/source/widgets/model/images/RandomForest-classification.png deleted file mode 100644 index 5d7da3880d7..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/RandomForest-classification.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/RandomForest-regression.png b/doc/visual-programming/source/widgets/model/images/RandomForest-regression.png deleted file mode 100644 index 753b401cb28..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/RandomForest-regression.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/RandomForest.png b/doc/visual-programming/source/widgets/model/images/RandomForest.png deleted file mode 100644 index 7b064925ff0..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/RandomForest.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/SVM-Predictions.png b/doc/visual-programming/source/widgets/model/images/SVM-Predictions.png deleted file mode 100644 index 1db70911253..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/SVM-Predictions.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/SVM-stamped.png b/doc/visual-programming/source/widgets/model/images/SVM-stamped.png deleted file mode 100644 index f49309388be..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/SVM-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/SVM-support-vectors.png b/doc/visual-programming/source/widgets/model/images/SVM-support-vectors.png deleted file mode 100644 index 4f5bd3b35a8..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/SVM-support-vectors.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/SaveModel-example.png b/doc/visual-programming/source/widgets/model/images/SaveModel-example.png deleted file mode 100644 index af16d9ef20f..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/SaveModel-example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/SaveModel-save.png b/doc/visual-programming/source/widgets/model/images/SaveModel-save.png deleted file mode 100644 index 7d1232c56f6..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/SaveModel-save.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/SaveModel-stamped.png b/doc/visual-programming/source/widgets/model/images/SaveModel-stamped.png deleted file mode 100644 index e8455f36276..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/SaveModel-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/ScoringSheet-widget.png b/doc/visual-programming/source/widgets/model/images/ScoringSheet-widget.png deleted file mode 100644 index 439cae2681e..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/ScoringSheet-widget.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/ScoringSheet-workflow.png b/doc/visual-programming/source/widgets/model/images/ScoringSheet-workflow.png deleted file mode 100644 index c58265d57a7..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/ScoringSheet-workflow.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/ScoringSheet-workflow2.png b/doc/visual-programming/source/widgets/model/images/ScoringSheet-workflow2.png deleted file mode 100644 index 32331184dc8..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/ScoringSheet-workflow2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/Stacking-Example.png b/doc/visual-programming/source/widgets/model/images/Stacking-Example.png deleted file mode 100644 index bcd17922cf7..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/Stacking-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/Stacking-stamped.png b/doc/visual-programming/source/widgets/model/images/Stacking-stamped.png deleted file mode 100644 index 9d23ecc93c1..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/Stacking-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/StochasticGradientDescent-classification.png b/doc/visual-programming/source/widgets/model/images/StochasticGradientDescent-classification.png deleted file mode 100644 index 1a052bd1586..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/StochasticGradientDescent-classification.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/StochasticGradientDescent-regression.png b/doc/visual-programming/source/widgets/model/images/StochasticGradientDescent-regression.png deleted file mode 100644 index f85648ad9ac..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/StochasticGradientDescent-regression.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/StochasticGradientDescent-stamped.png b/doc/visual-programming/source/widgets/model/images/StochasticGradientDescent-stamped.png deleted file mode 100644 index ea6de4456dd..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/StochasticGradientDescent-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/Tree-classification-model.png b/doc/visual-programming/source/widgets/model/images/Tree-classification-model.png deleted file mode 100644 index 9974878da2b..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/Tree-classification-model.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/Tree-classification-visualize.png b/doc/visual-programming/source/widgets/model/images/Tree-classification-visualize.png deleted file mode 100644 index 946bcfe718b..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/Tree-classification-visualize.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/Tree-regression-subset.png b/doc/visual-programming/source/widgets/model/images/Tree-regression-subset.png deleted file mode 100644 index 1ad859bc27c..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/Tree-regression-subset.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/Tree-stamped.png b/doc/visual-programming/source/widgets/model/images/Tree-stamped.png deleted file mode 100644 index 697da551ccb..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/Tree-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/kNN-regression.png b/doc/visual-programming/source/widgets/model/images/kNN-regression.png deleted file mode 100644 index 2f9ac1a85d9..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/kNN-regression.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/images/kNN-stamped.png b/doc/visual-programming/source/widgets/model/images/kNN-stamped.png deleted file mode 100644 index 08a29d1f5b0..00000000000 Binary files a/doc/visual-programming/source/widgets/model/images/kNN-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/model/knn.md b/doc/visual-programming/source/widgets/model/knn.md deleted file mode 100644 index b894655acdb..00000000000 --- a/doc/visual-programming/source/widgets/model/knn.md +++ /dev/null @@ -1,55 +0,0 @@ -kNN -=== - -Predict according to the nearest training instances. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) - -**Outputs** - -- Learner: kNN learning algorithm -- Model: trained model - -The **kNN** widget uses the [kNN algorithm](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm) that searches for k closest training examples in feature space and uses their average as prediction. - -![](images/kNN-stamped.png) - -1. A name under which it will appear in other widgets. The default name is "kNN". -2. Set the number of nearest neighbors, the distance parameter (metric) and weights as model criteria. - - Metric can be: - - [Euclidean](https://en.wikipedia.org/wiki/Euclidean_distance) ("straight line", distance between two points) - - [Manhattan](https://en.wikipedia.org/wiki/Taxicab_geometry) (sum of absolute differences of all attributes) - - [Maximal](https://en.wikipedia.org/wiki/Chebyshev_distance) (greatest of absolute differences between attributes) - - [Mahalanobis](https://en.wikipedia.org/wiki/Mahalanobis_distance) (distance between point and distribution). - - The *Weights* you can use are: - - **Uniform**: all points in each neighborhood are weighted equally. - - **Distance**: closer neighbors of a query point have a greater influence than the neighbors further away. -3. Produce a report. -4. When you change one or more settings, you need to click *Apply*, which will put a new learner on the output and, if the training examples are given, construct a new model and output it as well. Changes can also be applied automatically by clicking the box on the left side of the *Apply* button. - -Preprocessing -------------- - -kNN uses default preprocessing when no other preprocessors are given. It executes them in the following order: - -- removes instances with unknown target values -- continuizes categorical variables (with one-hot-encoding) -- removes empty columns -- imputes missing values with mean values -- normalizes the data by centering to mean and scaling to standard deviation of 1 - -To remove default preprocessing, connect an empty [Preprocess](../data/preprocess.md) widget to the learner. - -Examples --------- - -The first example is a classification task on *iris* dataset. We compare the results of [k-Nearest neighbors](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm) with the default model [Constant](../model/constant.md), which always predicts the majority class. - -![](images/Constant-classification.png) - -The second example is a regression task. This workflow shows how to use the *Learner* output. For the purpose of this example, we used the *housing* dataset. We input the **kNN** prediction model into [Predictions](../evaluate/predictions.md) and observe the predicted values. - -![](images/kNN-regression.png) diff --git a/doc/visual-programming/source/widgets/model/linearregression.md b/doc/visual-programming/source/widgets/model/linearregression.md deleted file mode 100644 index b77d4542d08..00000000000 --- a/doc/visual-programming/source/widgets/model/linearregression.md +++ /dev/null @@ -1,62 +0,0 @@ -Linear Regression -================= - -A linear regression algorithm with optional L1 (LASSO), L2 (ridge) or L1L2 (elastic net) regularization. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) - -**Outputs** - -- Learner: linear regression learning algorithm -- Model: trained model -- Coefficients: linear regression coefficients - -The **Linear Regression** widget constructs a learner/predictor that learns a [linear function](https://en.wikipedia.org/wiki/Linear_regression) from its input data. The model can identify the relationship between a predictor xi and the response variable y. Additionally, [Lasso](https://en.wikipedia.org/wiki/Least_squares#Lasso_method) and [Ridge](https://en.wikipedia.org/wiki/Least_squares#Lasso_method) regularization parameters can be specified. Lasso regression minimizes a penalized version of the least squares loss function with L1-norm penalty and Ridge regularization with L2-norm penalty. - -Linear regression works only on regression tasks. - -![](images/Linear-Regression.png) - -1. The learner/predictor name -2. Parameters: Fit intercept. Unchecking the option forces the intercept to zero. -3. Choose a model to train: - - no regularization - - a [Ridge](https://en.wikipedia.org/wiki/Least_squares#Lasso_method) regularization (L2-norm penalty) - - a [Lasso](https://en.wikipedia.org/wiki/Least_squares#Lasso_method) bound (L1-norm penalty) - - an [Elastic net](https://en.wikipedia.org/wiki/Elastic_net_regularization) regularization - -Preprocessing -------------- - -Linear Regression uses default preprocessing when no other preprocessors are given. It executes them in the following order: - -- removes instances with unknown target values -- continuizes categorical variables (with one-hot-encoding) -- removes empty columns -- imputes missing values with mean values - -To remove default preprocessing, connect an empty [Preprocess](../data/preprocess.md) widget to the learner. - -Feature Scoring ---------------- - -Linear Regression can be used with Rank for feature scoring. See [Learners as Scorers](../../learners-as-scorers/index.md) for an example. - -Observing Coefficients ----------------------- - -To observe coefficients of linear regression, first build a model, then pass the model to the [Data Table](../data/datatable.md). This will automatically connect the *Coefficients* output to the Data Table, where you can sort the table by coefficients and observe which variables positively and negatively correlate with the prediction. - -![](images/Linear-Regression-workflow.png) -![](images/Linear-Regression-coefficients.png) - - -Example -------- - -Below, is a simple workflow with *housing* dataset. We trained **Linear Regression** and [Random Forest](../model/randomforest.md) and evaluated their performance in [Test & Score](../evaluate/testandscore.md). - -![](images/LinearRegression-regression.png) diff --git a/doc/visual-programming/source/widgets/model/loadmodel.md b/doc/visual-programming/source/widgets/model/loadmodel.md deleted file mode 100644 index 6506594e76c..00000000000 --- a/doc/visual-programming/source/widgets/model/loadmodel.md +++ /dev/null @@ -1,21 +0,0 @@ -Load Model -========== - -Load a model from an input file. - -**Outputs** - -- Model: trained model - -![](images/LoadModel-stamped.png) - -1. Choose from a list of previously used models. -2. Browse for saved models. -3. Reload the selected model. - -Example -------- - -When you want to use a custom-set model that you've saved before, open the **Load Model** widget and select the desired file with the *Browse* icon. This widget loads the existing model into [Predictions](../evaluate/predictions.md) widget. Datasets used with **Load Model** have to contain compatible attributes! - -![](images/LoadModel-example.png) diff --git a/doc/visual-programming/source/widgets/model/logisticregression.md b/doc/visual-programming/source/widgets/model/logisticregression.md deleted file mode 100644 index b091bc2c420..00000000000 --- a/doc/visual-programming/source/widgets/model/logisticregression.md +++ /dev/null @@ -1,49 +0,0 @@ -Logistic Regression -=================== - -The logistic regression classification algorithm with LASSO (L1) or ridge (L2) regularization. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) - -**Outputs** - -- Learner: logistic regression learning algorithm -- Model: trained model -- Coefficients: logistic regression coefficients - -**Logistic Regression** learns a [Logistic Regression](https://en.wikipedia.org/wiki/Logistic_regression) model from the data. It only works for classification tasks. - -![](images/LogisticRegression-stamped.png) - -1. A name under which the learner appears in other widgets. The default name is "Logistic Regression". -2. [Regularization](https://en.wikipedia.org/wiki/Regularization_(mathematics)) type (either [L1](https://en.wikipedia.org/wiki/Least_squares#Lasso_method) or [L2](https://en.wikipedia.org/wiki/Tikhonov_regularization)). Set the cost strength (default is C=1). -3. Press *Apply* to commit changes. If *Apply Automatically* is ticked, changes will be communicated automatically. - -Preprocessing -------------- - -Logistic Regression uses default preprocessing when no other preprocessors are given. It executes them in the following order: - -- removes instances with unknown target values -- continuizes categorical variables (with one-hot-encoding) -- removes empty columns -- imputes missing values with mean values - -To remove default preprocessing, connect an empty [Preprocess](../data/preprocess.md) widget to the learner. - -Feature Scoring ---------------- - -Logistic Regression can be used with Rank for feature scoring. See [Learners as Scorers](../../learners-as-scorers/index.md) for an example. - -Example -------- - -The widget is used just as any other widget for inducing a classifier. This is an example demonstrating prediction results with logistic regression on the *hayes-roth* dataset. We first load *hayes-roth_learn* in the [File](../data/file.md) widget and pass the data to **Logistic Regression**. Then we pass the trained model to [Predictions](../evaluate/predictions.md). - -Now we want to predict class value on a new dataset. We load *hayes-roth_test* in the second **File** widget and connect it to **Predictions**. We can now observe class values predicted with **Logistic Regression** directly in **Predictions**. - -![](images/LogisticRegression-classification.png) diff --git a/doc/visual-programming/source/widgets/model/naivebayes.md b/doc/visual-programming/source/widgets/model/naivebayes.md deleted file mode 100644 index b5f3fed48fe..00000000000 --- a/doc/visual-programming/source/widgets/model/naivebayes.md +++ /dev/null @@ -1,42 +0,0 @@ -Naive Bayes -=========== - -A fast and simple probabilistic classifier based on Bayes' theorem with the assumption of feature independence. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) - -**Outputs** - -- Learner: naive bayes learning algorithm -- Model: trained model - -**Naive Bayes** learns a [Naive Bayesian](https://en.wikipedia.org/wiki/Naive_Bayes_classifier) model from the data. It only works for classification tasks. - -![](images/NaiveBayes-stamped.png) - -This widget has two options: the name under which it will appear in other widgets and producing a report. The default name is *Naive Bayes*. When you change it, you need to press *Apply*. - -Preprocessing -------------- - -Naive Bayes uses default preprocessing when no other preprocessors are given. It executes them in the following order: - -- removes empty columns -- discretizes numeric values to 4 bins with equal frequency - -To remove default preprocessing, connect an empty [Preprocess](../data/preprocess.md) widget to the learner. - -Examples --------- - -Here, we present two uses of this widget. First, we compare the results of the -**Naive Bayes** with another model, the [Random Forest](../model/randomforest.md). We connect *iris* data from [File](../data/file.md) to [Test & Score](../evaluate/testandscore.md). We also connect **Naive Bayes** and [Random Forest](../model/randomforest.md) to **Test & Score** and observe their prediction scores. - -![](images/NaiveBayes-classification.png) - -The second schema shows the quality of predictions made with **Naive Bayes**. We feed the [Test & Score](../evaluate/testandscore.md) widget a Naive Bayes learner and then send the data to the [Confusion Matrix](../evaluate/confusionmatrix.md). We also connect [Scatter Plot](../visualize/scatterplot.md) with **File**. Then we select the misclassified instances in the **Confusion Matrix** and show feed them to [Scatter Plot](../visualize/scatterplot.md). The bold dots in the scatterplot are the misclassified instances from **Naive Bayes**. - -![](images/NaiveBayes-visualize.png) diff --git a/doc/visual-programming/source/widgets/model/neuralnetwork.md b/doc/visual-programming/source/widgets/model/neuralnetwork.md deleted file mode 100644 index c1fe9add64f..00000000000 --- a/doc/visual-programming/source/widgets/model/neuralnetwork.md +++ /dev/null @@ -1,61 +0,0 @@ -Neural Network -============== - -A multi-layer perceptron (MLP) algorithm with backpropagation. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) - -**Outputs** - -- Learner: multi-layer perceptron learning algorithm -- Model: trained model - -The **Neural Network** widget uses sklearn's [Multi-layer Perceptron algorithm](http://scikit-learn.org/stable/modules/neural_networks_supervised.html) that can learn non-linear models as well as linear. - -![](images/NeuralNetwork-stamped.png) - -1. A name under which it will appear in other widgets. The default name is "Neural Network". -2. Set model parameters: - - Neurons per hidden layer: defined as the ith element represents the number of neurons in the ith hidden layer. E.g. a neural network with 3 layers can be defined as 2, 3, 2. - - Activation function for the hidden layer: - - Identity: no-op activation, useful to implement linear bottleneck - - Logistic: the logistic sigmoid function - - tanh: the hyperbolic tan function - - ReLu: the rectified linear unit function - - Solver for weight optimization: - - L-BFGS-B: an optimizer in the family of quasi-Newton methods - - SGD: stochastic gradient descent - - Adam: stochastic gradient-based optimizer - - Alpha: L2 penalty (regularization term) parameter - - Max iterations: maximum number of iterations - - Other parameters are set to [sklearn's defaults](http://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html). -3. Produce a report. -4. When the box is ticked (*Apply Automatically*), the widget will communicate changes automatically. Alternatively, click *Apply*. - -Preprocessing -------------- - -Neural Network uses default preprocessing when no other preprocessors are given. It executes them in the following order: - -- removes instances with unknown target values -- continuizes categorical variables (with one-hot-encoding) -- removes empty columns -- imputes missing values with mean values -- normalizes the data by centering to mean and scaling to standard deviation of 1 - -To remove default preprocessing, connect an empty [Preprocess](../data/preprocess.md) widget to the learner. - -Examples --------- - -The first example is a classification task on *iris* dataset. We compare the results of **Neural Network** with the [Logistic Regression](../model/logisticregression.md). - -![](images/NN-Example-Test.png) - -The second example is a prediction task, still using the *iris* data. This workflow shows how to use the *Learner* output. We input the **Neural Network** prediction model into [Predictions](../evaluate/predictions.md) and observe the predicted values. - -![](images/NN-Example-Predict.png) diff --git a/doc/visual-programming/source/widgets/model/pls.md b/doc/visual-programming/source/widgets/model/pls.md deleted file mode 100644 index 1bc814a445c..00000000000 --- a/doc/visual-programming/source/widgets/model/pls.md +++ /dev/null @@ -1,34 +0,0 @@ -PLS -=== - -Partial Least Squares Regression widget for multivariate data analysis. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) - -**Outputs** - -- Learner: PLS regression learning algorithm -- Model: trained model -- Coefficients: PLS regression coefficients - -**PLS** (Partial Least Squares) widget acts as a regressor for data with numeric target variable. In its current implementation, it is the same as linear regression, but with a different kind of regularization. Here, regularization is performed with the choice of the components - the more components, the lesser the effect of regularization. - -PLS widget can output coefficients, just like [Linear Regression](../model/linearregression.md). One can observe the effect of each variable in a [Data Table](../data/datatable.md). - -![](images/PLS-stamped.png) - -1. The learner/predictor name -2. Parameters: - - Components: the number of components of the model, which act as regularization (the more components, the lesser the regularization) - - Iteration limit: maximum iterations for stopping the algorithm -3. Press *Apply* to commit changes. If *Apply Automatically* is ticked, changes are committed automatically. - -Example -------- - -Below, is a simple workflow with *housing* dataset. We trained **PLS** and [Linear Regression](../model/linearregression.md) and evaluated their performance in [Test & Score](../evaluate/testandscore.md). - -![](images/PLS-Example.png) diff --git a/doc/visual-programming/source/widgets/model/randomforest.md b/doc/visual-programming/source/widgets/model/randomforest.md deleted file mode 100644 index 58c5c1d954e..00000000000 --- a/doc/visual-programming/source/widgets/model/randomforest.md +++ /dev/null @@ -1,66 +0,0 @@ -Random Forest -============= - -Predict using an ensemble of decision trees. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) - -**Outputs** - -- Learner: random forest learning algorithm -- Model: trained model - -[Random forest](https://en.wikipedia.org/wiki/Random_forest) is an ensemble learning method used for classification, regression and other tasks. It was first proposed by Tin Kam Ho and further developed by Leo Breiman (Breiman, 2001) and Adele Cutler. - -**Random Forest** builds a set of decision trees. Each tree is developed from a bootstrap sample from the training data. When developing individual trees, an arbitrary subset of attributes is drawn (hence the term "Random"), from which the best attribute for the split is selected. The final model is based on the majority vote from individually developed trees in the forest. - -**Random Forest** works for both classification and regression tasks. - -![](images/RandomForest.png) - -1. Specify the name of the model. The default name is "Random Forest". -2. Basic properties: - - *Number of trees*: Specify how many decision trees will be included in the forest. - - *Number of trees considered at each split*: Specify how many attributes will be arbitrarily drawn for consideration at each node. If the latter is not specified (option *Number of attributes...* left unchecked), this number is equal to the square root of the number of attributes in the data. - - *Replicable training*: Fix the seed for tree generation, which enables replicability of the results. - - *Balance class distribution*: [Weigh classes](https://scikit-learn.org/stable/modules/generated/sklearn.utils.class_weight.compute_class_weight.html?highlight=sklearn%20utils%20class_weight) inversely proportional to their frequencies. -3. Growth control: - - *Limit depth of individual trees*: Original Breiman's proposal is to grow the trees without any pre-pruning, but since pre-pruning often works quite well and is faster, the user can set the depth to which the trees will be grown. - - *Do not split subsets smaller than*: Select the smallest subset that can be split. -4. Click *Apply* to communicate the changes to other widgets. Alternatively, tick the box on the left side of the *Apply* button and changes will be communicated automatically. - -Preprocessing -------------- - -Random Forest uses default preprocessing when no other preprocessors are given. It executes them in the following order: - -- removes instances with unknown target values -- continuizes categorical variables (with one-hot-encoding) -- removes empty columns -- imputes missing values with mean values - -To remove default preprocessing, connect an empty [Preprocess](../data/preprocess.md) widget to the learner. - -Feature Scoring ---------------- - -Random Forest can be used with Rank for feature scoring. See [Learners as Scorers](../../learners-as-scorers/index.md) for an example. - -Examples --------- - -For classification tasks, we use *iris* dataset. Connect it to [Predictions](../evaluate/predictions.md). Then, connect [File](../data/file.md) to **Random Forest** and [Tree](../model/tree.md) and connect them further to [Predictions](../evaluate/predictions.md). Finally, observe the predictions for the two models. - -![](images/RandomForest-classification.png) - -For regressions tasks, we will use *housing* data. Here, we will compare different models, namely **Random Forest**, [Linear Regression](../model/linearregression.md) and [Constant](../model/constant.md), in the [Test & Score](../evaluate/testandscore.md) widget. - -![](images/RandomForest-regression.png) - -References ----------- - -Breiman, L. (2001). Random Forests. In Machine Learning, 45(1), 5-32. Available [here](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf). diff --git a/doc/visual-programming/source/widgets/model/savemodel.md b/doc/visual-programming/source/widgets/model/savemodel.md deleted file mode 100644 index 8d75d081ee2..00000000000 --- a/doc/visual-programming/source/widgets/model/savemodel.md +++ /dev/null @@ -1,24 +0,0 @@ -Save Model -========== - -Save a trained model to an output file. - -If the file is saved to the same directory as the workflow or in the subtree of that directory, the widget remembers the relative path. Otherwise it will store an absolute path, but disable auto save for security reasons. - -**Inputs** - -- Model: trained model - -![](images/SaveModel-stamped.png) - -1. Choose from previously saved models. -2. Save the created model with the *Browse* icon. Click on the icon and enter the name of the file. The model will be saved to a pickled file. -![](images/SaveModel-save.png) -3. Save the model. - -Example -------- - -When you want to save a custom-set model, feed the data to the model (e.g. [Logistic Regression](../model/logisticregression.md)) and connect it to **Save Model**. Name the model; load it later into workflows with [Load Model](../model/loadmodel.md). Datasets used with **Load Model** have to contain compatible attributes. - -![](images/SaveModel-example.png) diff --git a/doc/visual-programming/source/widgets/model/scoringsheet.md b/doc/visual-programming/source/widgets/model/scoringsheet.md deleted file mode 100644 index 2d8951270c1..00000000000 --- a/doc/visual-programming/source/widgets/model/scoringsheet.md +++ /dev/null @@ -1,39 +0,0 @@ -Scoring Sheet -================ -A classification model for explainable predictions. - -**Inputs** - -- Data: dataset used to train the model -- Preprocessor: preprocessing methods - -**Outputs** - -- Learner: scoring sheet ([fasterrisk](https://github.com/jiachangliu/FasterRisk)) learning algorithm -- Model: a trained scoring sheet model - -**Scoring Sheet** widget offers a machine learning model, which can be easily interpreted using the `Scoring Sheet Viewer` widget. The backbone of the widget is the fasterrisk algorithm, for more information you can read the paper. - -![](images/ScoringSheet-widget.png) - -The Scoring Sheet widget has four different parameters which we can tune to suit our needs: - -- Number of Attributes After Feature Selection - This widget requires all features to be binary, resulting in a preprocessing pipeline that discretizes continuous features and one-hot encodes categorical ones. This parameter helps to manage (reduce) the potentially large number of resulting features and ensures a faster learning process by selecting only the best ones for model training. - -- Maximum Number of Decision Parameters - Limits the number of decision parameters in the model, balancing complexity and explainability. More parameters can increase accuracy but make the model harder to explain. - -- Maximum Points per Decision Parameter - Controls the range of points each decision parameter can contribute. A wider range can increase model complexity and accuracy but may reduce explainability. - -- Number of Input Features Used - Specifies how many original features (before binarization) the decision parameters can originate from. This is useful for ensuring each parameter originates from a unique feature or when only a subset of features is desired. - - -Example -------- - -![](images/ScoringSheet-workflow.png) - -The workflow above shows the most straightforward way of using the Scoring Sheet widget. After training the Scoring Sheet model using our dataset, we input it into the Scoring Sheet Viewer widget, which presents us with a scoring sheet. - -![](images/ScoringSheet-workflow2.png) - -The second way of using the Scoring Sheet widget is to use it as any other classification model. In this case, we can use the Test & Score widget to evaluate the model's performance. In the evaluation results, we can see the model's performance for its predictions. \ No newline at end of file diff --git a/doc/visual-programming/source/widgets/model/stacking.md b/doc/visual-programming/source/widgets/model/stacking.md deleted file mode 100644 index c3e4876f3f2..00000000000 --- a/doc/visual-programming/source/widgets/model/stacking.md +++ /dev/null @@ -1,33 +0,0 @@ -Stacking -======== - -Stack multiple models. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) -- Learners: learning algorithm -- Aggregate: model aggregation method - -**Outputs** - -- Learner: aggregated (stacked) learning algorithm -- Model: trained model - -**Stacking** is an ensemble method that computes a meta model from several base models. The **Stacking** widget has the **Aggregate** input, which provides a method for aggregating the input models. If no aggregation input is given the default methods are used. Those are **Logistic Regression** for classification and **Ridge Regression** for regression problems. - -![](images/Stacking-stamped.png) - -1. The meta learner can be given a name under which it will appear in other widgets. The default name is “Stack”. -2. Click *Apply* to commit the aggregated model. That will put the new learner in the output and, if the training examples are given, construct a new model and output it as well. To communicate changes automatically tick *Apply Automatically*. -3. Access help and produce a report. - -Example -------- - -We will use [Paint Data](../data/paintdata.md) to demonstrate how the widget is used. We painted a complex dataset with 4 class labels and sent it to [Test & Score](../evaluate/testandscore.md). We also provided three [kNN](../model/knn.md) learners, each with a different parameters (number of neighbors is 5, 10 or 15). Evaluation results are good, but can we do better? - -Let's use **Stacking**. **Stacking** requires several learners on the input and an aggregation method. In our case, this is [Logistic Regression](../model/logisticregression.md). A constructed meta learner is then sent to **Test & Score**. Results have improved, even if only marginally. **Stacking** normally works well on complex data sets. - -![](images/Stacking-Example.png) diff --git a/doc/visual-programming/source/widgets/model/stochasticgradient.md b/doc/visual-programming/source/widgets/model/stochasticgradient.md deleted file mode 100644 index 1011b3cee67..00000000000 --- a/doc/visual-programming/source/widgets/model/stochasticgradient.md +++ /dev/null @@ -1,84 +0,0 @@ -Stochastic Gradient Descent -=========================== - -Minimize an objective function using a stochastic approximation of gradient descent. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) - -**Outputs** - -- Learner: stochastic gradient descent learning algorithm -- Model: trained model - -The **Stochastic Gradient Descent** widget uses [stochastic gradient descent](https://en.wikipedia.org/wiki/Stochastic_gradient_descent) that minimizes a chosen loss function with a linear function. The algorithm approximates a true gradient by considering one sample at a time, and simultaneously updates the model based on the gradient of the loss function. For regression, it returns predictors as minimizers of the sum, i.e. M-estimators, and is especially useful for large-scale and sparse datasets. - -![](images/StochasticGradientDescent-stamped.png) - -1. Specify the name of the model. The default name is "SGD". -2. Algorithm parameters: - - Classification loss function: - - [Hinge](https://en.wikipedia.org/wiki/Hinge_loss) (linear SVM) - - [Logistic Regression](http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html#sklearn.linear_model.LogisticRegression) (logistic regression SGD) - - [Modified Huber](https://en.wikipedia.org/wiki/Huber_loss) (smooth loss that brings tolerance to outliers as well as probability estimates) - - *Squared Hinge* (quadratically penalized hinge) - - [Perceptron](http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Perceptron.html#sklearn.linear_model.Perceptron) (linear loss used by the perceptron algorithm) - - [Squared Loss](https://en.wikipedia.org/wiki/Mean_squared_error#Regression) (fitted to ordinary least-squares) - - [Huber](https://en.wikipedia.org/wiki/Huber_loss) (switches to linear loss beyond ε) - - [Epsilon insensitive](http://kernelsvm.tripod.com/) (ignores errors within ε, linear beyond it) - - *Squared epsilon insensitive* (loss is squared beyond ε-region). - - Regression loss function: - - [Squared Loss](https://en.wikipedia.org/wiki/Mean_squared_error#Regression) (fitted to ordinary least-squares) - - [Huber](https://en.wikipedia.org/wiki/Huber_loss) (switches to linear loss beyond ε) - - [Epsilon insensitive](http://kernelsvm.tripod.com/) (ignores errors within ε, linear beyond it) - - *Squared epsilon insensitive* (loss is squared beyond ε-region). -3. Regularization norms to prevent overfitting: - - None. - - [Lasso (L1)](https://en.wikipedia.org/wiki/Taxicab_geometry) (L1 leading to sparse solutions) - - [Ridge (L2)](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm) (L2, standard regularizer) - - [Elastic net](https://en.wikipedia.org/wiki/Elastic_net_regularization) (mixing both penalty norms). - - Regularization strength defines how much regularization will be applied (the less we regularize, the more we allow the model to fit the data) and the mixing parameter what the ratio between L1 and L2 loss will be (if set to 0 then the loss is L2, if set to 1 then it is L1). -4. Learning parameters. - - Learning rate: - - *Constant*: learning rate stays the same through all epochs (passes) - - [Optimal](http://leon.bottou.org/projects/sgd): a heuristic proposed by Leon Bottou - - [Inverse scaling](http://users.ics.aalto.fi/jhollmen/dippa/node22.html): earning rate is inversely related to the number of iterations - - Initial learning rate. - - Inverse scaling exponent: learning rate decay. - - Number of iterations: the number of passes through the training data. - - If *Shuffle data after each iteration* is on, the order of data instances is mixed after each pass. - - If *Fixed seed for random shuffling* is on, the algorithm will use a fixed random seed and enable replicating the results. -5. Produce a report. -6. Press *Apply* to commit changes. Alternatively, tick the box on the left side of the *Apply* button and changes will be communicated automatically. - -Preprocessing -------------- - -SGD uses default preprocessing when no other preprocessors are given. It executes them in the following order: - -- removes instances with unknown target values -- continuizes categorical variables (with one-hot-encoding) -- removes empty columns -- imputes missing values with mean values -- normalizes the data by centering to mean and scaling to standard deviation of 1 - -To remove default preprocessing, connect an empty [Preprocess](../data/preprocess.md) widget to the learner. - -Feature Scoring ---------------- - -Stochastic Gradient Descent can be used with Rank for feature scoring. See [Learners as Scorers](../../learners-as-scorers/index.md) for an example. - -Examples --------- - -For the classification task, we will use *iris* dataset and test two models on it. We connected [Stochastic Gradient Descent](../model/stochasticgradient.md) and [Tree](../model/tree.md) to [Test & Score](../evaluate/testandscore.md). We also connected [File](../data/file.md) to **Test & Score** and observed model performance in the widget. - -![](images/StochasticGradientDescent-classification.png) - -For the regression task, we will compare three different models to see which predict what kind of results. For the purpose of this example, the *housing* dataset is used. We connect the [File](../data/file.md) widget to **Stochastic Gradient Descent**, [Linear Regression](../model/linearregression.md) and [kNN](../model/knn.md) widget and all four to the [Predictions](../evaluate/predictions.md) widget. - -![](images/StochasticGradientDescent-regression.png) diff --git a/doc/visual-programming/source/widgets/model/svm.md b/doc/visual-programming/source/widgets/model/svm.md deleted file mode 100644 index d4580763007..00000000000 --- a/doc/visual-programming/source/widgets/model/svm.md +++ /dev/null @@ -1,69 +0,0 @@ -SVM -=== - -Support Vector Machines map inputs to higher-dimensional feature spaces. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) - -**Outputs** - -- Learner: linear regression learning algorithm -- Model: trained model -- Support Vectors: instances used as support vectors - -[Support vector machine](https://en.wikipedia.org/wiki/Support_vector_machine) (SVM) is a machine learning technique that separates the attribute space with a hyperplane, thus maximizing the margin between the instances of different classes or class values. The technique often yields supreme predictive performance results. Orange embeds a popular implementation of SVM from the [LIBSVM](https://www.csie.ntu.edu.tw/~cjlin/libsvm/) package. This widget is its graphical user interface. - -For regression tasks, **SVM** performs linear regression in a high dimension feature space using an ε-insensitive loss. Its estimation accuracy depends on a good setting of C, ε and kernel parameters. The widget outputs class predictions based on a [SVM Regression](https://en.wikipedia.org/wiki/Support_vector_machine#Regression). - -The widget works for both classification and regression tasks. - -![](images/SVM-stamped.png) - -1. The learner can be given a name under which it will appear in other widgets. The default name is "SVM". -2. SVM type with test error settings. *SVM* and *ν-SVM* are based on different minimization of the error function. On the right side, you can set test error bounds: - - [SVM](http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html): - - [Cost](http://www.quora.com/What-are-C-and-gamma-with-regards-to-a-support-vector-machine): penalty term for loss and applies for classification and regression tasks. - - ε: a parameter to the epsilon-SVR model, applies to regression tasks. Defines the distance from true values within which no penalty is associated with predicted values. - - [ν-SVM](http://scikit-learn.org/stable/modules/generated/sklearn.svm.NuSVR.html#sklearn.svm.NuSVR): - - [Cost](http://www.quora.com/What-are-C-and-gamma-with-regards-to-a-support-vector-machine): penalty term for loss and applies only to regression tasks - - ν: a parameter to the ν-SVR model, applies to classification and regression tasks. An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. -3. Kernel is a function that transforms attribute space to a new feature space to fit the maximum-margin hyperplane, thus allowing the algorithm to create the model with [Linear](https://en.wikipedia.org/wiki/Linear_model), [Polynomial](https://en.wikipedia.org/wiki/Polynomial_kernel), [RBF](https://en.wikipedia.org/wiki/Radial_basis_function_kernel) and [Sigmoid](http://crsouza.com/2010/03/kernel-functions-for-machine-learning-applications/#sigmoid) kernels. Functions that specify the kernel are presented upon selecting them, and the constants involved are: - - **g** for the gamma constant in kernel function (the recommended value is 1/k, where k is the number of the attributes, but since there may be no training set given to the widget the default is 0 and the user has to set this option manually), - - **c** for the constant c0 in the kernel function (default 0), and - - **d** for the degree of the kernel (default 3). -4. Set permitted deviation from the expected value in *Numerical Tolerance*. Tick the box next to *Iteration Limit* to set the maximum number of iterations permitted. -5. Produce a report. -6. Click *Apply* to commit changes. If you tick the box on the left side of the *Apply* button, changes will be communicated automatically. - -Preprocessing -------------- - -SVM uses default preprocessing when no other preprocessors are given. It executes them in the following order: - -- removes instances with unknown target values -- continuizes categorical variables (with one-hot-encoding) -- removes empty columns -- imputes missing values with mean values - -For classification, SVM also normalizes dense and scales sparse data. - -To remove default preprocessing, connect an empty [Preprocess](../data/preprocess.md) widget to the learner. - -Examples --------- - -In the first (regression) example, we have used *housing* dataset and split the data into two data subsets (*Data Sample* and *Remaining Data*) with [Data Sampler](../data/datasampler.md). The sample was sent to SVM which produced a *Model*, which was then used in [Predictions](../evaluate/predictions.md) to predict the values in *Remaining Data*. A similar schema can be used if the data is already in two separate files; in this case, two [File](../data/file.md) widgets would be used instead of the [File](../data/file.md) - [Data Sampler](../data/datasampler.md) combination. - -![](images/SVM-Predictions.png) - -The second example shows how to use **SVM** in combination with [Scatter Plot](../visualize/scatterplot.md). The following workflow trains a SVM model on *iris* data and outputs support vectors, which are those data instances that were used as support vectors in the learning phase. We can observe which are these data instances in a scatter plot visualization. Note that for the workflow to work correctly, you must set the links between widgets as demonstrated in the screenshot below. - -![](images/SVM-support-vectors.png) - -References ----------- - -[Introduction to SVM on StatSoft](http://www.statsoft.com/Textbook/Support-Vector-Machines). diff --git a/doc/visual-programming/source/widgets/model/tree.md b/doc/visual-programming/source/widgets/model/tree.md deleted file mode 100644 index 910246ae9e5..00000000000 --- a/doc/visual-programming/source/widgets/model/tree.md +++ /dev/null @@ -1,49 +0,0 @@ -Tree -==== - -A tree algorithm with forward pruning. - -**Inputs** - -- Data: input dataset -- Preprocessor: preprocessing method(s) - -**Outputs** - -- Learner: decision tree learning algorithm -- Model: trained model - -**Tree** is a simple algorithm that splits the data into nodes by class purity (information gain for categorical and MSE for numeric target variable). It is a precursor to [Random Forest](../model/randomforest.md). Tree in Orange is designed in-house and can handle both categorical and numeric datasets. - -It can also be used for both classification and regression tasks. - -![](images/Tree-stamped.png) - -1. The learner can be given a name under which it will appear in other widgets. The default name is "Tree". -2. Tree parameters: - - **Induce binary tree**: build a binary tree (split into two child nodes) - - **Min. number of instances in leaves**: if checked, the algorithm will never construct a split which would put less than the specified number of training examples into any of the branches. - - **Do not split subsets smaller than**: forbids the algorithm to split the nodes with less than the given number of instances. - - **Limit the maximal tree depth**: limits the depth of the classification tree to the specified number of node levels. -3. **Stop when majority reaches [%]**: stop splitting the nodes after a specified majority threshold is reached -4. Produce a report. After changing the settings, you need to click *Apply*, which will put the new learner on the output and, if the training examples are given, construct a new classifier and output it as well. Alternatively, tick the box on the left and changes will be communicated automatically. - -Preprocessing -------------- - -Tree does not use any preprocessing. - -Examples --------- - -There are two typical uses for this widget. First, you may want to induce a model and check what it looks like in [Tree Viewer](../visualize/treeviewer.md). - -![](images/Tree-classification-visualize.png) - -The second schema trains a model and evaluates its performance against [Logistic Regression](../model/logisticregression.md). - -![](images/Tree-classification-model.png) - -We used the *iris* dataset in both examples. However, **Tree** works for regression tasks as well. Use *housing* dataset and pass it to **Tree**. The selected tree node from [Tree Viewer](../visualize/treeviewer.md) is presented in the [Scatter Plot](../visualize/scatterplot.md) and we can see that the selected examples exhibit the same features. - -![](images/Tree-regression-subset.png) diff --git a/doc/visual-programming/source/widgets/unsupervised/DBSCAN.md b/doc/visual-programming/source/widgets/unsupervised/DBSCAN.md deleted file mode 100644 index b3cb55d692e..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/DBSCAN.md +++ /dev/null @@ -1,34 +0,0 @@ -DBSCAN -====== - -Groups items using the DBSCAN clustering algorithm. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Data: dataset with cluster label as a meta attribute - -The widget applies the [DBSCAN clustering](https://en.wikipedia.org/wiki/DBSCAN) algorithm to the data and outputs a new dataset with cluster labels as a meta attribute. The widget also shows the sorted graph with distances to k-th nearest neighbors. With k values set to **Core point neighbors** as suggested in the [methods article](https://www.aaai.org/Papers/KDD/1996/KDD96-037.pdf). This gives the user the idea of an ideal selection for **Neighborhood distance** setting. As suggested by the authors, this parameter should be set to the first value in the first "valley" in the graph. - -![](images/DBSCAN.png) - -1. **Parameters**: - - *Core point neighbors*: The number of neighbors for a point to be considered as a core point. - - *Neighborhood distance*: The maximum distance between two samples for one to be considered as in the neighborhood of the other. -2. Distance metric used in grouping the items (Euclidean, Manhattan, or Cosine). If *Normalize features* is selected, the data will be standardized column-wise (centered to mean and scaled to standard deviation of 1). -3. If *Apply Automatically* is ticked, the widget will commit changes -automatically. Alternatively, click *Apply*. - -The graph shows the distance to the k-th nearest neighbor. *k* is -set by the **Core point neighbor** option. With moving the black slider -left and right you can select the right **Neighborhood distance**. - -Example -------- - -In the following example, we connected the [File](../data/file.md) widget with the Iris dataset to the DBSCAN widget. In the DBSCAN widget, we set **Core points neighbors** parameter to 5. And select the **Neighborhood distance** to the value in the first "valley" in the graph. We show clusters in the [Scatter Plot](../visualize/scatterplot.md) widget. - -![](images/DBSCAN-Example.png) diff --git a/doc/visual-programming/source/widgets/unsupervised/PCA.md b/doc/visual-programming/source/widgets/unsupervised/PCA.md deleted file mode 100644 index f4b3fdd5416..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/PCA.md +++ /dev/null @@ -1,46 +0,0 @@ -PCA -=== - -PCA linear transformation of input data. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Transformed Data: PCA transformed data -- Components: [Eigenvectors](https://en.wikipedia.org/wiki/Eigenvalues_and_eigenvectors). - -[Principal Component Analysis](https://en.wikipedia.org/wiki/Principal_component_analysis) (PCA) computes the PCA linear transformation of the input data. It outputs either a transformed dataset with weights of individual instances or weights of principal components. - -![](images/PCA-stamped.png) - -1. Select how many principal components you wish in your output. It is best to choose as few as possible with variance covered as high as possible. You can also set how much variance you wish to cover with your principal components. -2. You can normalize data to adjust the values to common scale. If checked, columns are divided by their standard deviations. -3. When *Apply Automatically* is ticked, the widget will automatically communicate all changes. Alternatively, click *Apply*. -4. Press *Save Image* if you want to save the created image to your computer. -5. Produce a report. -6. Principal components graph, where the red (lower) line is the variance covered per component and the green (upper) line is cumulative variance covered by components. - -The number of components of the transformation can be selected either in the *Components Selection* input box or by dragging the vertical cutoff line in the graph. - -Preprocessing -------------- - -The widget preprocesses the input data in the following order: - -- continuizes categorical variables (with one-hot-encoding) -- imputes missing values with mean values -- if *Normalize variables* is checked, it divides columns by their standard deviation. - -Examples --------- - -**PCA** can be used to simplify visualizations of large datasets. Below, we used the *Iris* dataset to show how we can improve the visualization of the dataset with PCA. The transformed data in the [Scatter Plot](../visualize/scatterplot.md) show a much clearer distinction between classes than the default settings. - -![](images/PCAExample.png) - -The widget provides two outputs: transformed data and principal components. Transformed data are weights for individual instances in the new coordinate system, while components are the system descriptors (weights for principal components). When fed into the [Data Table](../data/datatable.md), we can see both outputs in numerical form. We used two data tables in order to provide a more clean visualization of the workflow, but you can also choose to edit the links in such a way that you display the data in just one data table. You only need to create two links and connect the *Transformed data* and *Components* inputs to the *Data* output. - -![](images/PCAExample2.png) diff --git a/doc/visual-programming/source/widgets/unsupervised/correspondenceanalysis.md b/doc/visual-programming/source/widgets/unsupervised/correspondenceanalysis.md deleted file mode 100644 index ced266e4d3f..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/correspondenceanalysis.md +++ /dev/null @@ -1,28 +0,0 @@ -Correspondence Analysis -======================= - -Correspondence analysis for categorical multivariate data. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Coordinates: coordinates of all components - -[Correspondence Analysis](https://en.wikipedia.org/wiki/Correspondence_analysis) (CA) computes the CA linear transformation of the input data. While it is similar to PCA, CA computes linear transformation on discrete rather than on continuous data. - -![](images/CorrespondenceAnalysis-stamped.png) - -1. Select the variables you want to see plotted. -2. Select the component for each axis. -3. [Inertia](https://en.wikipedia.org/wiki/Sylvester%27s_law_of_inertia) values (percentage of independence from transformation, i.e. variables are in the same dimension). -4. Produce a report. - -Example -------- - -Below, is a simple comparison between the **Correspondence Analysis** and [Scatter Plot](../visualize/scatterplot.md) widgets on the *Titanic* dataset. While the [Scatter Plot](../visualize/scatterplot.md) shows fairly well which class and sex had a good survival rate and which one didn't, **Correspondence Analysis** can plot several variables in a 2-D graph, thus making it easy to see the relations between variable values. It is clear from the graph that "no", "male" and "crew" are related to each other. The same goes for "yes", "female" and "first". - -![](images/CorrespondenceAnalysis-Example.png) diff --git a/doc/visual-programming/source/widgets/unsupervised/distancefile.md b/doc/visual-programming/source/widgets/unsupervised/distancefile.md deleted file mode 100644 index bbb698910f0..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/distancefile.md +++ /dev/null @@ -1,27 +0,0 @@ -Distance File -============= - -Loads an existing distance file. - -**Outputs** - -- Distance File: distance matrix - -![](images/DistanceFile-stamped.png) - -1. Choose from a list of previously saved distance files. -2. Browse for saved distance files. -3. Reload the selected distance file. -4. Information about the distance file (number of points, - labelled/unlabelled). -5. Browse documentation datasets. -6. Produce a report. - -The simplest way to prepare a distance file is to use Excel. The widget currently processes only single-sheet workbooks. The matrix can be either rectangular, or upper- or lower-triangular, with labels given for columns (immediately above) or rows (immediately to the left) or both. Empty cells are treated as zeros. If the matrix is triangular and only one set of labels is given or both sets are equal, the other half can be filled automatically, making the matrix symmetric. - -Example -------- - -When you want to use a custom-set distance file that you've saved before, open the **Distance File** widget and select the desired file with the *Browse* icon. This widget loads the existing distance file. In the snapshot below, we loaded the transformed *Iris* distance matrix from the [Save Distance Matrix](../unsupervised/savedistancematrix.md) example. We displayed the transformed data matrix in the [Distance Map](../unsupervised/distancemap.md) widget. We also decided to display a distance map of the original *Iris* dataset for comparison. - -![](images/DistanceFile-Example.png) diff --git a/doc/visual-programming/source/widgets/unsupervised/distancemap.md b/doc/visual-programming/source/widgets/unsupervised/distancemap.md deleted file mode 100644 index 61a3766760f..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/distancemap.md +++ /dev/null @@ -1,52 +0,0 @@ -Distance Map -============ - -Visualizes distances between items. - -**Inputs** - -- Distances: distance matrix - -**Outputs** - -- Data: instances selected from the matrix -- Features: attributes selected from the matrix - -The **Distance Map** visualizes distances between objects. The visualization is the same as if we printed out a table of numbers, except that the numbers are replaced by colored spots. - -Distances are most often those between instances ("*rows*" in the [Distances](../unsupervised/distances.md) widget) or attributes ("*columns*" in Distances widget). The only suitable input for **Distance Map** is the [Distances](../unsupervised/distances.md) widget. For the output, the user can select a region of the map and the widget will output the corresponding instances or attributes. Also note that the **Distances** widget ignores discrete values and calculates distances only for continuous data, thus it can only display distance map for discrete data if you [Continuize](../data/continuize.md) them first. - -The snapshot shows distances between columns in the *heart disease* data, where smaller distances are represented with light and larger with dark orange. The matrix is symmetric and the diagonal is a light shade of orange - no attribute is different from itself. Symmetricity is always assumed, while the diagonal may also be non-zero. - -![](images/DistanceMap-stamped.png) - -1. *Element sorting* arranges elements in the map by - - None (lists instances as found in the dataset) - - **Clustering** (clusters data by similarity) - - **Clustering with ordered leaves** (maximizes the sum of similarities of adjacent elements) -2. *Colors* - - **Colors** (select the color palette for your distance map) - - **Low** and **High** are thresholds for the color palette (low for instances or attributes with low distances and high for instances or attributes with high distances). -3. Select *Annotations*. -4. If *Send Selected Automatically* is on, the data subset is communicated automatically, otherwise you need to press *Send Selected*. -5. Press *Save Image* if you want to save the created image to your computer. -6. Produce a report. - -Normally, a color palette is used to visualize the entire range of distances appearing in the matrix. This can be changed by setting the low and high threshold. In this way we ignore the differences in distances outside this interval and visualize the interesting part of the distribution. - -Below, we visualized the most correlated attributes (distances by columns) in the *heart disease* dataset by setting the color threshold for high distances to the minimum. We get a predominantly black square, where attributes with the lowest distance scores are represented by a lighter shade of the selected color schema (in our case: orange). Beside the diagonal line, we see that in our example *ST by exercise* and *major vessels colored* are the two attributes closest together. - -![](images/DistanceMap-Highlighted.png) - -The user can select a region in the map with the usual click-and-drag of the cursor. When a part of the map is selected, the widget outputs all items from the selected cells. - -Examples --------- - -The first workflow shows a very standard use of the **Distance Map** widget. We select 70% of the original *Iris* data as our sample and view the distances between rows in **Distance Map**. - -![](images/DistanceMap-Example1.png) - -In the second example, we use the *heart disease* data again and select a subset of women only from the [Scatter Plot](../visualize/scatterplot.md). Then, we visualize distances between columns in the **Distance Map**. Since the subset also contains some discrete data, the [Distances](../unsupervised/distances.md) widget warns us it will ignore the discrete features, thus we will see only continuous instances/attributes in the map. - -![](images/DistanceMap-Example.png) diff --git a/doc/visual-programming/source/widgets/unsupervised/distancematrix.md b/doc/visual-programming/source/widgets/unsupervised/distancematrix.md deleted file mode 100644 index 8777fbfa69e..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/distancematrix.md +++ /dev/null @@ -1,31 +0,0 @@ -Distance Matrix -=============== - -Visualizes distance measures in a distance matrix. - -**Inputs** - -- Distances: distance matrix - -**Outputs** - -- Distances: distance matrix -- Table: distance measures in a distance matrix - -The **Distance Matrix** widget creates a distance matrix, which is a two-dimensional array containing the distances, taken pairwise, between the elements of a set. The number of elements in the dataset defines the size of the matrix. Data matrices are essential for hierarchical clustering and they are extremely useful in bioinformatics as well, where they are used to represent protein structures in a coordinate-independent manner. - -![](images/DistanceMatrix-stamped.png) - -1. Elements in the dataset and the distances between them. -2. Label the table. The options are: *none*, *enumeration*, *according to variables*. -3. Produce a report. -4. Click *Send* to communicate changes to other widgets. Alternatively, tick the box in front of the *Send* button and changes will be communicated automatically (*Send Automatically*). - -The only two suitable inputs for **Distance Matrix** are the [Distances](../unsupervised/distances.md) widget and the [Distance Transformation](../unsupervised/distancetransformation.md) widget. The output of the widget is a data table containing the distance matrix. The user can decide how to label the table and the distance matrix (or instances in the distance matrix) can then be visualized or displayed in a separate data table. - -Example -------- - -The example below displays a very standard use of the **Distance Matrix** widget. We compute the distances between rows in the sample from the *Iris* dataset and output them in the **Distance Matrix**. It comes as no surprise that Iris Virginica and Iris Setosa are the furthest apart. - -![](images/DistanceMatrix-Example.png) diff --git a/doc/visual-programming/source/widgets/unsupervised/distances.md b/doc/visual-programming/source/widgets/unsupervised/distances.md deleted file mode 100644 index 613e0827ac6..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/distances.md +++ /dev/null @@ -1,55 +0,0 @@ -Distances -========= - -Computes distances between rows/columns in a dataset. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Distances: distance matrix - -The **Distances** widget computes distances between rows or columns in a dataset. By default, the data will be normalized to ensure equal treatment of individual features. Normalization is always done column-wise. - -Sparse data can only be used with Euclidean, Manhattan and Cosine metric. - -The resulting distance matrix can be fed further to [Hierarchical Clustering](hierarchicalclustering.md) for uncovering groups in the data, to [Distance Map](distancemap.md) or [Distance Matrix](distancematrix.md) for visualizing the distances (Distance Matrix can be quite slow for larger data sets), to [MDS](mds.md) for mapping the data instances using the distance matrix and finally, saved with [Save Distance Matrix](savedistancematrix.md). Distance file can be loaded with [Distance File](distancefile.md). - -Distances work well with Orange add-ons, too. The distance matrix can be fed to Network from Distances (Network add-on) to convert the matrix into a graph and to Duplicate Detection (Text add-on) to find duplicate documents in the corpus. - -![](images/Distances-stamped.png) - -1. Choose whether to measure distances between rows or columns. -2. Choose the *Distance Metric*: - - [Euclidean](https://en.wikipedia.org/wiki/Euclidean_distance) ("straight line", distance between two points) - - [Manhattan](https://en.wiktionary.org/wiki/Manhattan_distance) (the sum of absolute differences for all attributes) - - [Cosine](https://en.wikipedia.org/wiki/Cosine_similarity) (the cosine of the angle between two vectors of an inner product space). Orange computes the cosine distance, which is 1-similarity. - - [Jaccard](https://en.wikipedia.org/wiki/Jaccard_index) (the size of the intersection divided by the size of the union of the sample sets) - - [Spearman](https://en.wikipedia.org/wiki/Spearman's_rank_correlation_coefficient)(linear correlation between the rank of the values, remapped as a distance in a [0, 1] interval) - - [Spearman absolute](https://en.wikipedia.org/wiki/Spearman's_rank_correlation_coefficient)(linear correlation between the rank of the absolute values, remapped as a distance in a [0, 1] interval) - - [Pearson](https://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient) (linear correlation between the values, remapped as a distance in a [0, 1] interval) - - [Pearson absolute](https://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient) (linear correlation between the absolute values, remapped as a distance in a [0, 1] interval) - - [Hamming](https://en.wikipedia.org/wiki/Hamming_distance) (the number of features at which the corresponding values are different) - - [Bhattacharyya distance](https://en.wikipedia.org/wiki/Bhattacharyya_distance) (Similarity between two probability distributions, not a real distance as it doesn't obey triangle inequality.) - - Normalize the features. Normalization is always done column-wise. Values are zero centered and scaled. - In case of missing values, the widget automatically imputes the average value of the row or the column. - The widget works for both numeric and categorical data. In case of categorical data, the distance is 0 if the two values are the same ('green' and 'green') and 1 if they are not ('green' and 'blue'). -3. Tick *Apply Automatically* to automatically commit changes to other widgets. Alternatively, press '*Apply*'. - -Examples --------- - -The first example shows a typical use of the **Distances** widget. We are using the *iris.tab* data from the [File](../data/file.md) widget. We compute distances between data instances (rows) and pass the result to the [Hierarchical Clustering](hierarchicalclustering.md). This is a simple workflow to find groups of data instances. - -![](images/Distances-Example1-rows.png) - -Alternatively, we can compute distance between columns and find how similar our features are. - -![](images/Distances-Example1-columns.png) - -The second example shows how to visualize the resulting distance matrix. A nice way to observe data similarity is in a [Distance Map](distancemap.md) or in [MDS](mds.md). - -![](images/Distances-Example2.png) diff --git a/doc/visual-programming/source/widgets/unsupervised/distancetransformation.md b/doc/visual-programming/source/widgets/unsupervised/distancetransformation.md deleted file mode 100644 index e86d3839f67..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/distancetransformation.md +++ /dev/null @@ -1,37 +0,0 @@ -Distance Transformation -======================= - -Transforms distances in a dataset. - -**Inputs** - -- Distances: distance matrix - -**Outputs** - -- Distances: transformed distance matrix - -The **Distances Transformation** widget is used for the normalization and inversion of distance matrices. The normalization of data is necessary to bring all the variables into proportion with one another. - -![](images/DistanceTransformation-stamped.png) - -1. Choose the type of [Normalization](https://en.wikipedia.org/wiki/Normalization_\(statistics\)): - - **No normalization** - - **To interval [0, 1]** - - **To interval [-1, 1]** - - [Sigmoid function](https://en.wikipedia.org/wiki/Sigmoid_function): 1/(1+exp(-X)) -2. Choose the type of Inversion: - - **No inversion** - - **-X** - - **1 - X** - - **max(X) - X** - - **1/X** -3. Produce a report. -4. After changing the settings, you need to click *Apply* to commit changes to other widgets. Alternatively, tick *Apply automatically*. - -Example -------- - -In the snapshot below, you can see how transformation affects the distance matrix. We loaded the *Iris* dataset and calculated the distances between rows with the help of the [Distances](../unsupervised/distances.md) widget. In order to demonstrate how **Distance Transformation** affects the [Distance Matrix](../unsupervised/distancematrix.md), we created the workflow below and compared the transformed distance matrix with the "original" one. - -![](images/DistanceTransformation-Example.png) diff --git a/doc/visual-programming/source/widgets/unsupervised/hierarchicalclustering.md b/doc/visual-programming/source/widgets/unsupervised/hierarchicalclustering.md deleted file mode 100644 index 0689d358eb8..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/hierarchicalclustering.md +++ /dev/null @@ -1,59 +0,0 @@ -Hierarchical Clustering -======================= - -Groups items using a hierarchical clustering algorithm. - -**Inputs** - -- Distances: distance matrix - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: data with an additional column showing whether an instance is selected - -The widget computes [hierarchical clustering](https://en.wikipedia.org/wiki/Hierarchical_clustering) of arbitrary types of objects from a matrix of distances and shows a corresponding [dendrogram](https://en.wikipedia.org/wiki/Dendrogram). Distances can be computed with the [Distances](../unsupervised/distances.md) widget - -![](images/Hierarchical-Clustering.png) - -1. The widget supports the following ways of measuring distances between clusters: - - **Single linkage** computes the distance between the closest elements of the two clusters - - **Average linkage** computes the average distance between elements of the two clusters - - **Weighted linkage** uses the [WPGMA](http://research.amnh.org/~siddall/methods/day1.html) method - - **Complete linkage** computes the distance between the clusters' most distant elements - - **Ward linkage** computes the increase of the error sum of squares. In other words, the [Ward's minimum variance criterion](https://en.wikipedia.org/wiki/Ward%27s_method) minimizes the total within-cluster variance. -2. Labels of nodes in the dendrogram can be chosen in the **Annotation** box. -3. Huge dendrograms can be pruned in the *Pruning* box by selecting the maximum depth of the dendrogram. This only affects the display, not the actual clustering. -4. The widget offers three different selection methods: - - **Manual** (Clicking inside the dendrogram will select a cluster. Multiple clusters can be selected by holding Ctrl/Cmd. Each selected cluster is shown in a different color and is treated as a separate cluster in the output.) - - **Height ratio** (Clicking on the bottom or top ruler of the dendrogram places a cutoff line in the graph. Items to the right of the line are selected.) - - **Top N** (Selects the number of top nodes.) -5. Use *Zoom* and scroll to zoom in or out. -6. The data can be automatically output on any change (*Send Automatically*) or, if the box isn't ticked, by pushing *Send Selection*. - -To output the cluster, click on the ruler at the top or the bottom of the visualization. This will create a cut-off for the clusters. - -Examples --------- - -#### Cluster selection and projections - -We start with the *Grades for English and Math* data set from the [Datasets](../data/datasets.md) widget. The data contains two numeric variables, grades for English and for Algebra. - -**Hierarchical Clustering** requires distance matrix on the input. We compute it with [Distances](../unsupervised/distances.md), where we use the *Euclidean* distance metric. - -Once the data is passed to the hierarchical clustering, the widget displays a dendrogram, a tree-like clustering structure. Each node represents an instance in the data set, in our case a student. Tree nodes are labelled with student names. - -To create the clusters, we click on the ruler at the desired threshold. In this case, we chose three clusters. We pass those clusters to [MDS](../unsupervised/mds.md), which shows a 2D projection of data instances, colored by cluster label. - -![](images/Hierarchical-Example1.png) - -#### Cluster explanation - -In the second example, we continue the *Grades for English and Math* data. Say we wish to explain what characterizes the cluster with Maya, George, Lea, and Phill. - -We select the cluster in the dendrogram and pass the entire data set to [Box Plot](../visualize/boxplot.md). Note that the connection here is *Data*, not *Selected Data*. To rewire the connection, double-click on it. - -In **Box Plot**, we set *Selected* variable as the Subgroup. This will split the plot into selected data instances (our cluster) and the remaining data. Next, we use *Order by relevance to subgroup* option, which sorts the variables according to how well they distinguish between subgroups. It turns out, that our cluster contains students who are bad at math (they have low values of the Algebra variable). - -![](images/Hierarchical-Example2.png) diff --git a/doc/visual-programming/source/widgets/unsupervised/icons/correspondence-analysis.png b/doc/visual-programming/source/widgets/unsupervised/icons/correspondence-analysis.png deleted file mode 100644 index 0dfde060767..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/icons/correspondence-analysis.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/icons/distance-file.png b/doc/visual-programming/source/widgets/unsupervised/icons/distance-file.png deleted file mode 100644 index 16429c3b52b..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/icons/distance-file.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/icons/distance-map.png b/doc/visual-programming/source/widgets/unsupervised/icons/distance-map.png deleted file mode 100644 index 1c6ad541e8e..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/icons/distance-map.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/icons/distance-matrix.png b/doc/visual-programming/source/widgets/unsupervised/icons/distance-matrix.png deleted file mode 100644 index 833c3b636d6..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/icons/distance-matrix.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/icons/distance-transformation.png b/doc/visual-programming/source/widgets/unsupervised/icons/distance-transformation.png deleted file mode 100644 index c593dfa85a0..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/icons/distance-transformation.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/icons/distances.png b/doc/visual-programming/source/widgets/unsupervised/icons/distances.png deleted file mode 100644 index 0f0b464a1d8..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/icons/distances.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/icons/hierarchical-clustering.png b/doc/visual-programming/source/widgets/unsupervised/icons/hierarchical-clustering.png deleted file mode 100644 index e3aa9a0cdf0..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/icons/hierarchical-clustering.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/icons/k-means.png b/doc/visual-programming/source/widgets/unsupervised/icons/k-means.png deleted file mode 100644 index 5bfddfda34f..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/icons/k-means.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/icons/manifold-learning.png b/doc/visual-programming/source/widgets/unsupervised/icons/manifold-learning.png deleted file mode 100644 index 131bbcbf6de..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/icons/manifold-learning.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/icons/mds.png b/doc/visual-programming/source/widgets/unsupervised/icons/mds.png deleted file mode 100644 index 527ef3c2ba1..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/icons/mds.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/icons/pca.png b/doc/visual-programming/source/widgets/unsupervised/icons/pca.png deleted file mode 100644 index 16a4f2c31ff..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/icons/pca.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/icons/save-distance-matrix.png b/doc/visual-programming/source/widgets/unsupervised/icons/save-distance-matrix.png deleted file mode 100644 index 69f632ab250..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/icons/save-distance-matrix.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/icons/silhouette-plot.png b/doc/visual-programming/source/widgets/unsupervised/icons/silhouette-plot.png deleted file mode 100644 index 87caf0b4943..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/icons/silhouette-plot.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/icons/som.png b/doc/visual-programming/source/widgets/unsupervised/icons/som.png deleted file mode 100644 index deda3197d40..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/icons/som.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/icons/tSNE.png b/doc/visual-programming/source/widgets/unsupervised/icons/tSNE.png deleted file mode 100644 index 0f6631f4c6a..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/icons/tSNE.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/CorrespondenceAnalysis-Example.png b/doc/visual-programming/source/widgets/unsupervised/images/CorrespondenceAnalysis-Example.png deleted file mode 100644 index 6b51afe40c2..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/CorrespondenceAnalysis-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/CorrespondenceAnalysis-stamped.png b/doc/visual-programming/source/widgets/unsupervised/images/CorrespondenceAnalysis-stamped.png deleted file mode 100644 index ab9aac63363..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/CorrespondenceAnalysis-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/CorrespondenceAnalysis.png b/doc/visual-programming/source/widgets/unsupervised/images/CorrespondenceAnalysis.png deleted file mode 100644 index 9a76b4785c6..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/CorrespondenceAnalysis.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DBSCAN-Example.png b/doc/visual-programming/source/widgets/unsupervised/images/DBSCAN-Example.png deleted file mode 100644 index 72282b57755..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DBSCAN-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DBSCAN.png b/doc/visual-programming/source/widgets/unsupervised/images/DBSCAN.png deleted file mode 100644 index 8b6b0d02d23..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DBSCAN.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DistanceFile-Example.png b/doc/visual-programming/source/widgets/unsupervised/images/DistanceFile-Example.png deleted file mode 100644 index 31ca5240862..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DistanceFile-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DistanceFile-stamped.png b/doc/visual-programming/source/widgets/unsupervised/images/DistanceFile-stamped.png deleted file mode 100644 index 16ffa6ed4c1..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DistanceFile-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DistanceFile.png b/doc/visual-programming/source/widgets/unsupervised/images/DistanceFile.png deleted file mode 100644 index 5c0dc8b24d5..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DistanceFile.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMap-Example.png b/doc/visual-programming/source/widgets/unsupervised/images/DistanceMap-Example.png deleted file mode 100644 index 1bb2a8b4bbd..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMap-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMap-Example1.png b/doc/visual-programming/source/widgets/unsupervised/images/DistanceMap-Example1.png deleted file mode 100644 index d63d24d25bf..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMap-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMap-Highlighted.png b/doc/visual-programming/source/widgets/unsupervised/images/DistanceMap-Highlighted.png deleted file mode 100644 index ba7b1eb314b..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMap-Highlighted.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMap-stamped.png b/doc/visual-programming/source/widgets/unsupervised/images/DistanceMap-stamped.png deleted file mode 100644 index 8ed40fe9e77..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMap-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMap.png b/doc/visual-programming/source/widgets/unsupervised/images/DistanceMap.png deleted file mode 100644 index afccde1e1e5..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMap.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMatrix-Example.png b/doc/visual-programming/source/widgets/unsupervised/images/DistanceMatrix-Example.png deleted file mode 100644 index 0140e13b7a8..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMatrix-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMatrix-stamped.png b/doc/visual-programming/source/widgets/unsupervised/images/DistanceMatrix-stamped.png deleted file mode 100644 index 00636bf13c3..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMatrix-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMatrix.png b/doc/visual-programming/source/widgets/unsupervised/images/DistanceMatrix.png deleted file mode 100644 index cb7b3af9507..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DistanceMatrix.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DistanceTransformation-Example.png b/doc/visual-programming/source/widgets/unsupervised/images/DistanceTransformation-Example.png deleted file mode 100644 index 1f2fbcdf690..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DistanceTransformation-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DistanceTransformation-stamped.png b/doc/visual-programming/source/widgets/unsupervised/images/DistanceTransformation-stamped.png deleted file mode 100644 index f3a6a52bc33..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DistanceTransformation-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/DistanceTransformation.png b/doc/visual-programming/source/widgets/unsupervised/images/DistanceTransformation.png deleted file mode 100644 index 5d6b3b704ca..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/DistanceTransformation.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/Distances-Example1-columns.png b/doc/visual-programming/source/widgets/unsupervised/images/Distances-Example1-columns.png deleted file mode 100644 index 00d74b3c1b4..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/Distances-Example1-columns.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/Distances-Example1-rows.png b/doc/visual-programming/source/widgets/unsupervised/images/Distances-Example1-rows.png deleted file mode 100644 index c4b23b66599..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/Distances-Example1-rows.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/Distances-Example2.png b/doc/visual-programming/source/widgets/unsupervised/images/Distances-Example2.png deleted file mode 100644 index 77180d3cc66..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/Distances-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/Distances-stamped.png b/doc/visual-programming/source/widgets/unsupervised/images/Distances-stamped.png deleted file mode 100644 index 550930eee41..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/Distances-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/Hierarchical-Clustering.png b/doc/visual-programming/source/widgets/unsupervised/images/Hierarchical-Clustering.png deleted file mode 100644 index 1f8480f711e..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/Hierarchical-Clustering.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/Hierarchical-Example1.png b/doc/visual-programming/source/widgets/unsupervised/images/Hierarchical-Example1.png deleted file mode 100644 index c7f6da2e671..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/Hierarchical-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/Hierarchical-Example2.png b/doc/visual-programming/source/widgets/unsupervised/images/Hierarchical-Example2.png deleted file mode 100644 index 56d0b6cdbe2..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/Hierarchical-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/LouvainClustering-Example.png b/doc/visual-programming/source/widgets/unsupervised/images/LouvainClustering-Example.png deleted file mode 100644 index 1c4c75a7e52..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/LouvainClustering-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/LouvainClustering.png b/doc/visual-programming/source/widgets/unsupervised/images/LouvainClustering.png deleted file mode 100644 index 0d5e911f700..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/LouvainClustering.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/MDS-Example.png b/doc/visual-programming/source/widgets/unsupervised/images/MDS-Example.png deleted file mode 100644 index 37908e26503..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/MDS-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/MDS-zoo-stamped.png b/doc/visual-programming/source/widgets/unsupervised/images/MDS-zoo-stamped.png deleted file mode 100644 index cbfdb86f565..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/MDS-zoo-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/MDS-zoo.png b/doc/visual-programming/source/widgets/unsupervised/images/MDS-zoo.png deleted file mode 100644 index 419e1d1c70c..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/MDS-zoo.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/PCA-stamped.png b/doc/visual-programming/source/widgets/unsupervised/images/PCA-stamped.png deleted file mode 100644 index 181a6440fbf..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/PCA-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/PCA.png b/doc/visual-programming/source/widgets/unsupervised/images/PCA.png deleted file mode 100644 index c11d8961f54..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/PCA.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/PCAExample.png b/doc/visual-programming/source/widgets/unsupervised/images/PCAExample.png deleted file mode 100644 index 530b01d5e06..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/PCAExample.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/PCAExample2.png b/doc/visual-programming/source/widgets/unsupervised/images/PCAExample2.png deleted file mode 100644 index 9f2dab3679d..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/PCAExample2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/SaveDistanceMatrix-Example.png b/doc/visual-programming/source/widgets/unsupervised/images/SaveDistanceMatrix-Example.png deleted file mode 100644 index b25dfee3d52..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/SaveDistanceMatrix-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/SaveDistanceMatrix-stamped.png b/doc/visual-programming/source/widgets/unsupervised/images/SaveDistanceMatrix-stamped.png deleted file mode 100644 index 584152e1e58..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/SaveDistanceMatrix-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/SaveDistanceMatrix.png b/doc/visual-programming/source/widgets/unsupervised/images/SaveDistanceMatrix.png deleted file mode 100644 index eab333598fd..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/SaveDistanceMatrix.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/Self-Organizing_Map-stamped.png b/doc/visual-programming/source/widgets/unsupervised/images/Self-Organizing_Map-stamped.png deleted file mode 100644 index e95e856d372..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/Self-Organizing_Map-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/Self-Organizing_Map_Example.png b/doc/visual-programming/source/widgets/unsupervised/images/Self-Organizing_Map_Example.png deleted file mode 100644 index b5fc5f4344b..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/Self-Organizing_Map_Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/SilhouettePlot-Example.png b/doc/visual-programming/source/widgets/unsupervised/images/SilhouettePlot-Example.png deleted file mode 100644 index dade8110ac3..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/SilhouettePlot-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/SilhouettePlot-stamped.png b/doc/visual-programming/source/widgets/unsupervised/images/SilhouettePlot-stamped.png deleted file mode 100644 index 4b0c560f822..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/SilhouettePlot-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/SilhouettePlot.png b/doc/visual-programming/source/widgets/unsupervised/images/SilhouettePlot.png deleted file mode 100644 index a4597123ef9..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/SilhouettePlot.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/collage-manifold.png b/doc/visual-programming/source/widgets/unsupervised/images/collage-manifold.png deleted file mode 100644 index c1689aebe60..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/collage-manifold.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/kMeans-Example1.png b/doc/visual-programming/source/widgets/unsupervised/images/kMeans-Example1.png deleted file mode 100644 index 6899f19df92..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/kMeans-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/kMeans-Example2.png b/doc/visual-programming/source/widgets/unsupervised/images/kMeans-Example2.png deleted file mode 100644 index 8e6db17aa07..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/kMeans-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/kMeans-stamped.png b/doc/visual-programming/source/widgets/unsupervised/images/kMeans-stamped.png deleted file mode 100644 index 00f4e1ba71a..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/kMeans-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/manifold-learning-example.png b/doc/visual-programming/source/widgets/unsupervised/images/manifold-learning-example.png deleted file mode 100644 index 9625be820c1..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/manifold-learning-example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/manifold-learning-stamped.png b/doc/visual-programming/source/widgets/unsupervised/images/manifold-learning-stamped.png deleted file mode 100644 index 149ba2c3d4c..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/manifold-learning-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/tSNE-Example1.png b/doc/visual-programming/source/widgets/unsupervised/images/tSNE-Example1.png deleted file mode 100644 index 5eb220eb367..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/tSNE-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/tSNE-Example2.png b/doc/visual-programming/source/widgets/unsupervised/images/tSNE-Example2.png deleted file mode 100644 index 95102edc69f..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/tSNE-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/images/tSNE-stamped.png b/doc/visual-programming/source/widgets/unsupervised/images/tSNE-stamped.png deleted file mode 100644 index 6671375e014..00000000000 Binary files a/doc/visual-programming/source/widgets/unsupervised/images/tSNE-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/unsupervised/kmeans.md b/doc/visual-programming/source/widgets/unsupervised/kmeans.md deleted file mode 100644 index 5e9a13edf59..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/kmeans.md +++ /dev/null @@ -1,54 +0,0 @@ -k-Means -======= - -Groups items using the k-Means clustering algorithm. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Data: dataset with cluster label as a meta attribute -- Centroids: table with initial centroid coordinates - -The widget applies the [k-Means clustering](https://en.wikipedia.org/wiki/K-means_clustering) algorithm to the data and outputs a new dataset in which the cluster label is added as a meta attribute. Silhouette scores of clustering results for various k are also shown in the widget. When using the silhouette score option, the higher the silhouette score, the better the clustering. - -![](images/kMeans-stamped.png) - -1. Select the number of clusters. - - **Fixed**: algorithm clusters data to a specified number of clusters. - - **From X to Y**: widget shows clustering scores for the selected cluster range using the [Silhouette](https://en.wikipedia.org/wiki/Silhouette_\(clustering\)) score (contrasts average distance to elements in the same cluster with the average distance to elements in other clusters). -2. **Preprocessing**: If the option is selected, columns are normalized (mean centered to 0 and standard deviation scaled to 1). -3. Initialization method (the way the algorithm begins clustering): - - [k-Means++](https://en.wikipedia.org/wiki/K-means%2B%2B) (first center is selected randomly, subsequent are chosen from the remaining points with probability proportioned to squared distance from the closest center) - - **Random initialization** (clusters are assigned randomly at first and then updated with further iterations) - - **Re-runs** (how many times the algorithm is run from random initial positions; the result with the lowest within-cluster sum of squares will be used) and **Maximum iterations** (the maximum number of iterations within each algorithm run) can be set manually. - -Preprocessing -------------- - -k-Means uses default preprocessing if necessary. It executes it in the following order: - -- continuizes categorical variables (with one feature per value) -- imputes missing values with mean values - -To override default preprocessing, preprocess the data beforehand with [Preprocess](../data/preprocess.md) widget. - -Examples --------- - -First, we load the *Iris* dataset, run k-Means with three clusters, and show it in the [Scatter Plot](../visualize/scatterplot.md). To interactively explore the clusters, we can use [Select Rows](../data/selectrows.md) to select the cluster of interest (say, C1) and plot it in the scatter plot using interactive data analysis. That means if we pass a subset to the scatter plot, the subset will be exposed in the plot. - -Try the same procedure for 2 or 4 clusters or explore different clusters in the plot (C2, C3). - -![](images/kMeans-Example1.png) - -But as we used silhouette score to estimate our cluster quality, we can plot the clusters in the [Silhouette Plot](../visualize/silhouetteplot.md) to observe inliers and outliers. Place Silhouette Plot in place of Select Rows. - -Silhouette Plot shows silhouette scores for individual data instances. High, positive scores represent instances that are highly representative of the clusters, while negative scores represent instances that are outliers (don't fit well with the cluster). Select negative scores from the green cluster C3 and plot them in a scatter plot as a subset. - -It seems like these are mostly iris versicolors, which are bordering the iris virginica region. Note that the green color of the cluster C3 doesn't coincide with the green color of the iris labels - these are two different things. - -![](images/kMeans-Example2.png) diff --git a/doc/visual-programming/source/widgets/unsupervised/louvainclustering.md b/doc/visual-programming/source/widgets/unsupervised/louvainclustering.md deleted file mode 100644 index f9a3b57dc48..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/louvainclustering.md +++ /dev/null @@ -1,54 +0,0 @@ -Louvain Clustering -================== - -Groups items using the Louvain clustering algorithm. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Data: dataset with cluster label as a meta attribute -- Graph (with the Network addon): the weighted k-nearest neighbor graph - -The widget first converts the input data into a k-nearest neighbor graph. To preserve the notions of distance, the Jaccard index for the number of shared neighbors is used to weight the edges. Finally, a [modularity optimization](https://en.wikipedia.org/wiki/Louvain_Modularity) community detection algorithm is applied to the graph to retrieve clusters of highly interconnected nodes. The widget outputs a new dataset in which the cluster label is used as a meta attribute. - -![](images/LouvainClustering.png) - -1. Information on the number of clusters found. -2. **Preprocessing**: - - *Normalize data*: Center to mean and scale to standard deviation of 1. - - *Apply PCA preprocessing*: PCA processing is typically applied to the original data to remove noise (see [PCA](PCA.md) widget). - - *PCA Components*: number of principal components used. -3. **Graph parameters**: - - *Distance metric*: The distance metric is used for finding specified number of nearest neighbors (Euclidean, Manhattan, Cosine). - - *k neighbors*: The number of nearest neighbors to use to form the KNN graph. - - *Resolution* is a parameter for the Louvain community detection algorithm that affects the size of the recovered clusters. Smaller resolutions recover smaller clusters and therefore a larger number of them, while, conversely, larger values recover clusters containing more data points. -4. When *Apply Automatically* is ticked, the widget will automatically communicate all changes. Alternatively, click *Apply*. - -Preprocessing -------------- - -Louvain Clustering uses default preprocessing if necessary. It executes it in the following order: - -- continuizes categorical variables (with one feature per value) -- imputes missing values with mean values - -To override default preprocessing, preprocess the data beforehand with [Preprocess](../data/preprocess.md) widget. - -Example -------- - -*Louvain Clustering* converts the dataset into a graph, where it finds highly interconnected nodes. In the example below, we used the iris data set from the [File](../data/file.md) widget, then passed it to **Louvain Clustering**, which found 4 clusters. We plotted the data with [Scatter Plot](../visualize/scatterplot.md), where we colored the data points according to clusters labels. - -![](images/LouvainClustering-Example.png) - -We can visualize the graph itself using the **Network Explorer** from the Network addon. - -References ----------- - -Blondel, Vincent D., et al. "[Fast unfolding of communities in large networks.](https://arxiv.org/abs/0803.0476)" Journal of statistical mechanics: theory and experiment 2008.10 (2008): P10008. - -Lambiotte, Renaud, J-C. Delvenne, and Mauricio Barahona. "Laplacian dynamics and multiscale modular structure in networks." arXiv preprint, [arXiv:0812](https://arxiv.org/abs/0812.1770).1770 (2008). diff --git a/doc/visual-programming/source/widgets/unsupervised/manifoldlearning.md b/doc/visual-programming/source/widgets/unsupervised/manifoldlearning.md deleted file mode 100644 index 98de6bc73c6..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/manifoldlearning.md +++ /dev/null @@ -1,74 +0,0 @@ -Manifold Learning -================= - -Nonlinear dimensionality reduction. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Transformed Data: dataset with reduced coordinates - -[Manifold Learning](https://en.wikipedia.org/wiki/Nonlinear_dimensionality_reduction) is a technique which finds a non-linear manifold within the higher-dimensional space. The widget then outputs new coordinates which correspond to a two-dimensional space. Such data can be later visualized with [Scatter Plot](../visualize/scatterplot.md) or other visualization widgets. - -![](images/manifold-learning-stamped.png) - -1. Method for manifold learning: - - [t-SNE](http://scikit-learn.org/stable/modules/manifold.html#t-distributed-stochastic-neighbor-embedding-t-sne) - - [MDS](http://scikit-learn.org/stable/modules/manifold.html#multi-dimensional-scaling-mds), see also [MDS widget](../unsupervised/mds.md) - - [Isomap](http://scikit-learn.org/stable/modules/manifold.html#isomap) - - [Locally Linear Embedding](http://scikit-learn.org/stable/modules/manifold.html#locally-linear-embedding) - - [Spectral Embedding](http://scikit-learn.org/stable/modules/manifold.html#spectral-embedding) -2. Set parameters for the method: - - t-SNE (distance measures): - - *Euclidean* distance - - *Manhattan* - - *Chebyshev* - - *Jaccard* - - *Mahalanobis* - - *Cosine* - - MDS (iterations and initialization): - - *max iterations*: maximum number of optimization interactions - - *initialization*: method for initialization of the algorithm (PCA or random) - - Isomap: - - number of *neighbors* - - Locally Linear Embedding: - - *method*: - - standard - - modified - - [hessian eigenmap](http://scikit-learn.org/stable/modules/manifold.html#hessian-eigenmapping) - - local - - number of *neighbors* - - *max iterations* - - Spectral Embedding: - - *affinity*: - - nearest neighbors - - RFB kernel -3. Output: the number of reduced features (components). -4. If *Apply automatically* is ticked, changes will be propagated automatically. Alternatively, click *Apply*. -5. Produce a report. - -**Manifold Learning** widget produces different embeddings for high-dimensional data. - -![](images/collage-manifold.png) - -From left to right, top to bottom: t-SNE, MDS, Isomap, Locally Linear Embedding and Spectral Embedding. - -Preprocessing -------------- - -All projections use default preprocessing if necessary. It is executed in the following order: - -- continuization of categorical variables (with one feature per value) -- imputation of missing values with mean values - -To override default preprocessing, preprocess the data beforehand with [Preprocess](../data/preprocess.md) widget. - -Example -------- - -*Manifold Learning* widget transforms high-dimensional data into a lower dimensional approximation. This makes it great for visualizing datasets with many features. We used *voting.tab* to map 16-dimensional data onto a 2D graph. Then we used [Scatter Plot](../visualize/scatterplot.md) to plot the embeddings. - -![](images/manifold-learning-example.png) diff --git a/doc/visual-programming/source/widgets/unsupervised/mds.md b/doc/visual-programming/source/widgets/unsupervised/mds.md deleted file mode 100644 index 238a724b65c..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/mds.md +++ /dev/null @@ -1,70 +0,0 @@ -MDS -=== - -Multidimensional scaling (MDS) projects items onto a plane fitted to given distances between points. - -**Inputs** - -- Data: input dataset -- Distances: distance matrix -- Data Subset: subset of instances - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: dataset with MDS coordinates - -[Multidimensional scaling](https://en.wikipedia.org/wiki/Multidimensional_scaling) is a technique which finds a low-dimensional (in our case a two-dimensional) projection of points, where it tries to fit distances between points as well as possible. The perfect fit is typically impossible to obtain since the data is high-dimensional or the distances are not [Euclidean](https://en.wikipedia.org/wiki/Euclidean_distance). - -In the input, the widget needs either a dataset or a matrix of distances. When visualizing distances between rows, you can also adjust the color of the points, change their shape, mark them, and output them upon selection. - -The algorithm iteratively moves the points around in a kind of a simulation of a physical model: if two points are too close to each other (or too far away), there is a force pushing them apart (or together). The change of the point’s position at each time interval corresponds to the sum of forces acting on it. - -![](images/MDS-zoo-stamped.png) - -1. The widget redraws the projection during optimization. Optimization is run automatically in the beginning and later by pushing *Start*. - - **Max iterations**: The optimization stops either when the projection changes only minimally at the last iteration or when a maximum number of iterations has been reached. - - **Initialization**: PCA (Torgerson) positions the initial points along principal coordinate axes. *Random* sets the initial points to a random position and then readjusts them. - - **Refresh**: Set how often you want to refresh the visualization. It can be at *Every iteration*, *Every 5/10/25/50 steps* or never (*None*). Setting a lower refresh interval makes the animation more visually appealing, but can be slow if the number of points is high. -2. Defines how the points are visualized. These options are available only when visualizing distances between rows (selected in the [Distances](../unsupervised/distances.md) widget). - - **Color**: Color of points by attribute (gray for continuous, colored for discrete). - - **Shape**: Shape of points by attribute (only for discrete). - - **Size**: Set the size of points (*Same size* or select an attribute) or let the size depend on the value of the continuous attribute the point represents (Stress). - - **Label**: Discrete attributes can serve as a label. - - **Symbol size**: Adjust the size of the dots. - - **Symbol opacity**: Adjust the transparency level of the dots. - - **Show similar pairs**: Adjust the strength of network lines. - - **Jitter**: Set [jittering](https://en.wikipedia.org/wiki/Jitter) to prevent the dots from overlapping. -3. Adjust the graph with *Zoom/Select*. The arrow enables you to select data instances. The magnifying glass enables zooming, which can be also done by scrolling in and out. The hand allows you to move the graph around. The rectangle readjusts the graph proportionally. -4. Select the desired output: - - **Original features only** (input dataset) - - **Coordinates only** (MDS coordinates) - - **Coordinates as features** (input dataset + MDS coordinates as regular attributes) - - **Coordinates as meta attributes** (input dataset + MDS coordinates as meta attributes) -5. Sending the instances can be automatic if *Send selected automatically* is ticked. Alternatively, click *Send selected*. -6. **Save Image** allows you to save the created image either as .svg or .png file to your device. -7. Produce a report. - -The MDS graph performs many of the functions of the Visualizations widget. It is in many respects similar to the [Scatter Plot](../visualize/scatterplot.md) widget, so we recommend reading that widget's description as well. - -Preprocessing -------------- - -When given *Distances* on the input, preprocessing is not applied. When given *Data*, MDS uses default preprocessing if necessary. Preprocessing is executed in the following order: - -- continuizes categorical variables (with one feature per value) -- imputes missing values with mean values - -To override default preprocessing, preprocess the data beforehand with [Preprocess](../data/preprocess.md) widget. - -# Example - -The above graphs were drawn using the following simple schema. We used the *iris.tab* dataset. Using the [Distances](../unsupervised/distances.md) widget we input the distance matrix into the **MDS** widget, where we see the *Iris* data displayed in a 2-dimensional plane. We can see the appended coordinates in the [Data Table](../data/datatable.md) widget. - -![](images/MDS-Example.png) - -# References - -Wickelmaier, F. (2003). An Introduction to MDS. Sound Quality Research -Unit, Aalborg University. Available -[here](https://homepages.uni-tuebingen.de/florian.wickelmaier/pubs/Wickelmaier2003SQRU.pdf). diff --git a/doc/visual-programming/source/widgets/unsupervised/savedistancematrix.md b/doc/visual-programming/source/widgets/unsupervised/savedistancematrix.md deleted file mode 100644 index 7e735f007b6..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/savedistancematrix.md +++ /dev/null @@ -1,22 +0,0 @@ -Save Distance Matrix -==================== - -Saves a distance matrix. - -If the file is saved to the same directory as the workflow or in the subtree of that directory, the widget remembers the relative path. Otherwise it will store an absolute path, but disable auto save for security reasons. - -**Inputs** - -- Distances: distance matrix - -![](images/SaveDistanceMatrix-stamped.png) - -1. By clicking *Save*, you choose from previously saved distance matrices. Alternatively, tick the box on the left side of the *Save* button and changes will be communicated automatically. -2. By clicking *Save as*, you save the distance matrix to your computer, you only need to enter the name of the file and click *Save*. - -Example -------- - -In the snapshot below, we used the [Distance Transformation](../unsupervised/distancetransformation.md) widget to transform the distances in the *Iris* dataset. We then chose to save the transformed version to our computer, so we could use it later on. We decided to output all data instances. You can choose to output just a minor subset of the data matrix. Pairs are marked automatically. If you wish to know what happened to our changed file, see [Distance File](../unsupervised/distancefile.md). - -![](images/SaveDistanceMatrix-Example.png) diff --git a/doc/visual-programming/source/widgets/unsupervised/selforganizingmap.md b/doc/visual-programming/source/widgets/unsupervised/selforganizingmap.md deleted file mode 100644 index b5944e2e6c5..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/selforganizingmap.md +++ /dev/null @@ -1,37 +0,0 @@ -Self-Organizing Map -=================== - -Computation of a self-organizing map. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: data with an additional column showing whether a point is selected - -A [self-organizing map (SOM)](https://en.wikipedia.org/wiki/Self-organizing_map) is a type of artificial neural network (ANN) that is trained using unsupervised learning to produce a two-dimensional, discretized representation of the data. It is a method to do dimensionality reduction. Self-organizing maps use a neighborhood function to preserve the topological properties of the input space. - -The points in the grid represent data instances. By default, the size of the point corresponds to the number of instances represented by the point. The points are colored by majority class (if available), while the intensity of interior color shows the proportion of majority class. To see the class distribution, select *Show pie charts* option. - -Just like other visualization widgets, **Self-Organizing Maps** also supports interactive selection of groups. Use Shift key to select a new group and Ctr+Shift to add to the existing group. - -![](images/Self-Organizing_Map-stamped.png) - -1. SOM properties: - - Set the grid type. Options are hexagonal or square grid. - - If *Set dimensions automatically* is checked, the size of the plot will be set automatically. Alternatively, set the size manually. - - Set the initialization type for the SOM projection. Options are PCA initialization, random initialization and replicable random (random_seed = 0). - - Once the parameters are set, press *Start* to re-run the optimization. -2. Set the color of the instances in the plot. The widget colors by class by default (if available). - - *Show pie charts* turns points into pie-charts that show the distributions of the values used for coloring. - - *Size by number of instances* scales the points according to the number of instances represented by the point. - -Example -------- - -Self-organizing maps are low-dimensional projections of the input data. We will use the *brown-selected* data and display the data instance in a 2-D projection. Seems like the three gene types are well-separated. We can select a subset from the grid and display it in a Data Table. - -![](images/Self-Organizing_Map_Example.png) diff --git a/doc/visual-programming/source/widgets/unsupervised/tsne.md b/doc/visual-programming/source/widgets/unsupervised/tsne.md deleted file mode 100644 index 23e4bec86d8..00000000000 --- a/doc/visual-programming/source/widgets/unsupervised/tsne.md +++ /dev/null @@ -1,68 +0,0 @@ -t-SNE -===== - -Two-dimensional data projection with t-SNE. - -**Inputs** - -- Data: input dataset -- Distances: distance matrix -- Data Subset: subset of instances - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: data with t-SNE coordinates and an additional column showing whether a point is selected - -The **t-SNE** widget creates a visualization using t-distributed stochastic neighbor embedding (t-SNE). [t-SNE](https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding) is a dimensionality reduction technique, similar to MDS, where points are mapped to 2-D space by their probability distribution. - -The widget accepts either a data table or a distance matrix as input. If a data table is provided, the widget will apply the chosen preprocessing option, then calculate distances internally. - -![](images/tSNE-stamped.png) - -1. Preprocessing is applied before t-SNE computes the distances between data points in the dataset. These parameters are ignored when the *Distances* input is provided. - - *Normalize data*: We can apply standardization before running PCA. Standardization normalizes each column by subtracting the column mean and dividing by the standard deviation. - - *Apply PCA preprocessing*: For datasets with large numbers of features, e.g. 100 or 1,000, or highly correlated variables, we can apply PCA preprocessing to speed up the algorithm and decorrelate the data. - - *PCA components*: the number of principal components to use when applying PCA preprocessing. - -2. Optimization parameters. The parameters are explained in-depth [here](https://opentsne.readthedocs.io/en/latest/parameters.html): - - *Initialization*: PCA positions the initial points along principal coordinate axes. Spectral inialization calculates the spectral embedding of t-SNE's affinity matrix. Only spectral intialization is supported when using precomputed distance matrices. - - *Distance metric*: The distance metric to be used when calculating distances between data points. This setting is ignored when a precomputed distance matrix is provided. - - [*Perplexity*](http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html): Roughly speaking, perplexity be interpreted as the number of nearest neighbors to which distances will be preserved. Using smaller values can reveal small, local clusters, while using large values tends to reveal the broader, global relationships between data points. - - *Preserve global structure*: this option will combine two different perplexity values (50 and 500) to try preserve both the local and global structure. - - *Exaggeration*: this parameter increases the attractive forces between points, and can directly be used to control the compactness of clusters. Increasing exaggeration may also better highlight the global structure of the data. t-SNE with exaggeration set to 4 is roughly equal to UMAP. - - Press Start to (re-)run the optimization. -3. Set the color of the displayed points. Set shape, size and label to differentiate between points. If *Label only selection and subset* is ticked, only selected and/or highlighted points will be labelled. -4. Set symbol size and opacity for all data points. Set jittering to randomly disperse data points. -5. *Show color regions* colors the graph by class, while *Show legend* displays a legend on the right. Click and drag the legend to move it. -6. *Select, zoom, pan and zoom to fit* are the options for exploring the graph. The manual selection of data instances works as an angular/square selection tool. Double click to move the projection. Scroll in or out for zoom. -7. If *Send selected automatically* is ticked, changes are communicated automatically. Alternatively, press *Send Selected*. - -Preprocessing -------------- - -If necessary, t-SNE applies the following preprocessing steps by default, in the following order: - -- continuizes categorical variables (with one feature per value) -- imputes missing values with mean values - -To override default preprocessing, preprocess the data beforehand with [Preprocess](../data/preprocess.md) widget. - -The "Preprocessing" section also contains user-controllable options that are applied to a data table before distances are computed. - -If a distance matrix is provided as input, preprocessing is not applied. - -Examples --------- - -The first example is a simple t-SNE plot of *brown-selected* data set. Load *brown-selected* with the [File](../data/file.md) widget. Then connect **t-SNE** to it. The widget will show a 2D map of yeast samples, where samples with similar gene expression profiles will be close together. Select the region, where the gene function is mixed and inspect it in a [Data Table](../data/datatable.md). - -![](images/tSNE-Example1.png) - -For the second example, use [Single Cell Datasets](https://orangedatamining.com/widget-catalog/single-cell/single_cell_datasets/) widget from the Single Cell add-on to load *Bone marrow mononuclear cells with AML (sample)* data. We can use t-SNE to visualize the dataset. The t-SNE visualization shows that there indeed appear to be clusters of cells in our dataset. - -Let's try to determine which cluster of cells corresponds to natural killer cells (NK cells). The *Marker Genes* widget from the Single Cell add-on contains collections of known marker genes for different cell types. Select the markers for NK cells. - -We can then score how much each of our cells corresponds to these marker genes using the *Score Cells* widget. We can then visualize the result in our t-SNE plot. We color the points and determine their size according to the computed *Score*. The brightly-colored, larger points correspond to cells that had high expression values of our marker genes. We can conclude that this, upper-left cluster of cells corresponds to NK cells. - -![](images/tSNE-Example2.png) diff --git a/doc/visual-programming/source/widgets/visualize/barplot.md b/doc/visual-programming/source/widgets/visualize/barplot.md deleted file mode 100644 index a57f847781b..00000000000 --- a/doc/visual-programming/source/widgets/visualize/barplot.md +++ /dev/null @@ -1,34 +0,0 @@ -Bar Plot -======== - -Visualizes comparisons among discrete categories. - -**Inputs** - -- Data: input dataset -- Data Subset: subset of instances - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: data with an additional column showing whether a point is selected - -The **Bar Plot** widget visualizes numeric variables and compares them by a categorical variable. The widget is useful for observing outliers, distributions within groups, and comparing categories. - -![](images/Bar-Plot-stamped.png) - -1. Parameters of the plot. Values are the numeric variable to plot. Group by is the variable for grouping the data. Annotations are categorical labels below the plot. Color is the categorical variable whose values are used for coloring the bars. -2. *Select, zoom, pan and zoom to fit* are the options for exploring the graph. The manual selection of data instances works as an angular/square selection tool. Double click to move the projection. Scroll in or out for zoom. -3. If *Send automatically* is ticked, changes are communicated automatically. Alternatively, press *Send*. -4. Access help, save image, produce a report, or adjust visual settings. On the right, the information on input and output are shown. - -Example -------- - -The **Bar Plot** widget is most commonly used immediately after the [File](../data/file.md) widget to compare categorical values. In this example, we have used *heart-disease* data to inspect our variables. - -![](images/Bar-Plot-Example.png) - -First, we have observed cholesterol values of patient from our data set. We grouped them by diameter narrowing, which defines patients with a heart disease (1) and those without (0). We use the same variable for coloring the bars. - -Then, we selected patients over 60 years of age with [Select Rows](../data/selectrows.md). We sent the subset to **Bar Plot** to highlight these patients in the widget. The big outlier with a high cholesterol level is apparently over 60 years old. diff --git a/doc/visual-programming/source/widgets/visualize/boxplot.md b/doc/visual-programming/source/widgets/visualize/boxplot.md deleted file mode 100644 index 8086998b87a..00000000000 --- a/doc/visual-programming/source/widgets/visualize/boxplot.md +++ /dev/null @@ -1,52 +0,0 @@ -Box Plot -======== - -Shows distribution of attribute values. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: data with an additional column showing whether a point is selected - -The **Box Plot** widget shows the distributions of attribute values. It is a good practice to check any new data with this widget to quickly discover any anomalies, such as duplicated values (e.g., gray and grey), outliers, and alike. Bars can be selected - for example, values for categorical data or the quantile range for numeric data. - -![](images/BoxPlot-Continuous.png) - -1. Select the variable you want to plot. Tick *Order by relevance to subgroups* to order variables by Chi2 or ANOVA over the selected subgroup. -2. Choose *Subgroups* to see [box plots](https://en.wikipedia.org/wiki/Box_plot) displayed by a discrete subgroup. Tick *Order by relevance to variable* to order subgroups by Chi2 or ANOVA over the selected variable. -3. When instances are grouped by a subgroup, you can change the display mode. Annotated boxes will display the end values, the mean and the median, while comparing medians and compare means will, naturally, compare the selected value between subgroups. -![continuous](images/BoxPlot-Continuous-small.png) -4. The mean (the dark blue vertical line). The thin blue line represents the [standard deviation](http://mathworld.wolfram.com/StandardDeviation.html). -5. Values of the first (25%) and the third (75%) quantile. The blue highlighted area represents the values between the first and the third quartile. -6. The median (yellow vertical line). - -For discrete attributes, the bars represent the number of instances with each particular attribute value. The plot shows the number of different animal types in the *Zoo* dataset: there are 41 mammals, 13 fish, 20 birds, and so on. - -Display shows: -- *Stretch bars*: Shows relative values (proportions) of data instances. The unticked box shows absolute values. -- *Show box labels*: Display discrete values above each bar. -- *Sort by subgroup frequencies*: Sort subgroups by their descending frequency. - -![](images/BoxPlot-Discrete.png) - -Note ----- - -Ordering by relevance to subgroups computes ANOVA-based p-values for all variables, including the binary, in order to put them on the same scale. When showing significance in the graph, binary variables are treated differently: the widget computes the t-test with correction for unequal variance, which is more appropriate than ANOVA. These p-values may differ from those used in the ordering the variables in the lists on the left. - -Examples --------- - -The **Box Plot** widget is most commonly used immediately after the [File](../data/file.md) widget to observe the statistical properties of a dataset. In the first example, we have used *heart-disease* data to inspect our variables. - -![](images/BoxPlot-Example1.png) - -**Box Plot** is also useful for finding the properties of a specific dataset, for instance, a set of instances manually defined in another widget (e.g. [Scatter Plot](../visualize/scatterplot.md) or instances belonging to some cluster or a classification tree node. Let us now use *zoo* data and create a typical clustering workflow with [Distances](../unsupervised/distances.md) and [Hierarchical Clustering](../unsupervised/hierarchicalclustering.md). - -Now define the threshold for cluster selection (click on the ruler at the top). Connect **Box Plot** to **Hierarchical Clustering**, tick *Order by relevance*, and select *Cluster* as a subgroup. This will order attributes by how well they define the selected subgroup, in our case, a cluster. It seems like our clusters indeed correspond very well with the animal type! - -![](images/BoxPlot-Example2.png) diff --git a/doc/visual-programming/source/widgets/visualize/cn2ruleviewer.md b/doc/visual-programming/source/widgets/visualize/cn2ruleviewer.md deleted file mode 100644 index 089f653fa2e..00000000000 --- a/doc/visual-programming/source/widgets/visualize/cn2ruleviewer.md +++ /dev/null @@ -1,32 +0,0 @@ -CN2 Rule Viewer -=============== - -CN2 Rule Viewer - -**Inputs** - -- Data: dataset to filter -- CN2 Rule Classifier: CN2 Rule Classifier, including a list of induced rules - -**Outputs** - -- Filtered Data: data instances covered by all selected rules - -A widget that displays [CN2 classification](https://en.wikipedia.org/wiki/CN2_algorithm) rules. If data is also connected, upon rule selection, one can analyze which instances abide to the conditions. - -![](images/CN2RuleViewer-stamped.png) - -1. Original order of induced rules can be restored. -2. When rules are many and complex, the view can appear packed. For this reason, *compact view* was implemented, which allows a flat presentation and a cleaner inspection of rules. -3. Click *Report* to bring up a detailed description of the rule induction algorithm and its parameters, the data domain, and induced rules. - -Additionally, upon selection, rules can be copied to clipboard by pressing the default system shortcut (ctrl+C, cmd+C). - -Examples --------- - -In the schema below, the most common use of the widget is presented. First, the data is read and a CN2 rule classifier is trained. We are using *titanic* dataset for the rule construction. The rules are then viewed using the [Rule Viewer](../visualize/cn2ruleviewer.md). To explore different CN2 algorithms and understand how adjusting parameters influences the learning process, **Rule Viewer** should be kept open and in sight, while setting the CN2 learning algorithm (the presentation will be updated promptly). - -![](images/CN2-Viewer-Example1.png) - -Selecting a rule outputs filtered data instances. These can be viewed in a [Data Table](../data/datatable.md). diff --git a/doc/visual-programming/source/widgets/visualize/distributions.md b/doc/visual-programming/source/widgets/visualize/distributions.md deleted file mode 100644 index 4778388c485..00000000000 --- a/doc/visual-programming/source/widgets/visualize/distributions.md +++ /dev/null @@ -1,41 +0,0 @@ -Distributions -============= - -Displays value distributions for a single attribute. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: data with an additional column showing whether an instance is selected -- Histogram Data: bins and instance counts from the histogram - -The **Distributions** widget displays the [value distribution](https://en.wikipedia.org/wiki/Frequency_distribution) of discrete or continuous attributes. If the data contains a class variable, distributions may be conditioned on the class. - -The graph shows how many times (e.g., in how many instances) each attribute value appears in the data. If the data contains a class variable, class distributions for each of the attribute values will be displayed (like in the snapshot below). To create this graph, we used the *Zoo* dataset. - -![](images/Distributions-Discrete.png) - -1. A list of variables for display. *Sort categories by frequency* orders displayed values by frequency. -2. Set *Bin width* with the slider. Precision scale is set to sensible intervals. *Fitted distribution* fits selected distribution to the plot. Options are [Normal](https://en.wikipedia.org/wiki/Normal_distribution), [Beta](https://en.wikipedia.org/wiki/Beta_distribution), [Gamma](https://en.wikipedia.org/wiki/Gamma_distribution), [Rayleigh](https://en.wikipedia.org/wiki/Rayleigh_distribution), [Pareto](https://en.wikipedia.org/wiki/Pareto_distribution), [Exponential](https://en.wikipedia.org/wiki/Exponential_distribution), [Kernel density](https://en.wikipedia.org/wiki/Kernel_density_estimation). -3. Columns: - -- *Split by* displays value distributions for instances of a certain class. -- *Stack columns* displays one column per bin, colored by proportions of class values. -- *Show probabilities* shows probabilities of class values at selected variable. -- *Show cumulative distribution* cumulatively stacks frequencies. - -4. If *Apply Automatically* is ticked, changes are communicated automatically. Alternatively, click *Apply*. - -For continuous attributes, the attribute values are also displayed as a histogram. It is possible to fit various distributions to the data, for example, a Gaussian kernel density estimation. *Hide bars* hides histogram bars and shows only distribution (old behavior of Distributions). - -For this example, we used the *Iris* dataset. - -![](images/Distributions-Continuous.png) - -In class-less domains, the bars are displayed in blue. We used the *Housing* dataset. - -![](images/Distributions-NoClass.png) \ No newline at end of file diff --git a/doc/visual-programming/source/widgets/visualize/freeviz.md b/doc/visual-programming/source/widgets/visualize/freeviz.md deleted file mode 100644 index b88a19747c5..00000000000 --- a/doc/visual-programming/source/widgets/visualize/freeviz.md +++ /dev/null @@ -1,55 +0,0 @@ -FreeViz -======= - -Displays FreeViz projection. - -**Inputs** - -- Data: input dataset -- Data Subset: subset of instances - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: data with an additional column showing whether a point is selected -- Components: FreeViz vectors - -**FreeViz** uses a paradigm borrowed from particle physics: points in the same class attract each other, those from different class repel each other, and the resulting forces are exerted on the anchors of the attributes, that is, on unit vectors of each of the dimensional axis. The points cannot move (are projected in the projection space), but the attribute anchors can, so the optimization process is a hill-climbing optimization where at the end the anchors are placed such that forces are in equilibrium. The button Optimize is used to invoke the optimization process. The result of the optimization may depend on the initial placement of the anchors, which can be set in a circle, arbitrary or even manually. The later also works at any stage of optimization, and we recommend to play with this option in order to understand how a change of one anchor affects the positions of the data points. In any linear projection, projections of unit vector that are very short compared to the others indicate that their associated attribute is not very informative for particular classification task. Those vectors, that is, their corresponding anchors, may be hidden from the visualization using Radius slider in Show anchors box. - -![](images/freeviz-zoo-stamped.png) - -1. Two initial positions of anchors are possible: random and circular. Optimization moves anchors in an optimal position. -2. Set the color of the displayed points (you will get colors for discrete values and grey-scale points for continuous). Set label, shape and size to differentiate between points. Set symbol size and opacity for all data points. -3. Anchors inside a circle are hidden. Circle radius can be be changed using a slider. -4. Adjust plot properties: - - Set [jittering](https://en.wikipedia.org/wiki/Jitter) to prevent the dots from overlapping (especially for discrete attributes). - - *Show legend* displays a legend on the right. Click and drag the legend to move it. - - *Show class density* colors the graph by class (see the screenshot below). - - *Label only selected points* allows you to select individual data instances and label them. -5. *Select, zoom, pan and zoom to fit* are the options for exploring the graph. The manual selection of data instances works as an angular/square selection tool. Double click to move the projection. Scroll in or out for zoom. -6. If *Send automatically* is ticked, changes are communicated automatically. Alternatively, press *Send*. -7. *Save Image* saves the created image to your computer in a .svg or .png format. -8. Produce a report. - -Manually move anchors ---------------------- - -![](images/freeviz-moveanchor.png) - -One can manually move anchors. Use a mouse pointer and hover above the end of an anchor. Click the left button and then you can move selected anchor where ever you want. - -Selection ---------- - -Selection can be used to manually defined subgroups in the data. Use Shift modifier when selecting data instances to put them into a new group. Shift + Ctrl (or Shift + Cmd on macOs) appends instances to the last group. - -Signal data outputs a data table with an additional column that contains group indices. - -![](images/FreeViz-selection.png) - -Explorative Data Analysis -------------------------- - -The **FreeViz**, as the rest of Orange widgets, supports zooming-in and out of part of the plot and a manual selection of data instances. These functions are available in the lower left corner of the widget. The default tool is *Select*, which selects data instances within the chosen rectangular area. *Pan* enables you to move the plot around the pane. With *Zoom* you can zoom in and out of the pane with a mouse scroll, while *Reset zoom* resets the visualization to its optimal size. An example of a simple schema, where we selected data instances from a rectangular region and sent them to the [Data Table](../data/datatable.md) widget, is shown below. - -![](images/FreeViz-Example-Explorative.png) diff --git a/doc/visual-programming/source/widgets/visualize/heatmap.md b/doc/visual-programming/source/widgets/visualize/heatmap.md deleted file mode 100644 index a593b55773d..00000000000 --- a/doc/visual-programming/source/widgets/visualize/heatmap.md +++ /dev/null @@ -1,72 +0,0 @@ -Heat Map -======== - -Plots a heat map for a pair of attributes. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: data with an additional column showing whether a point is selected - -[Heat map](https://en.wikipedia.org/wiki/Heat_map) is a graphical method for visualizing attribute values in a two-way matrix. It only works on datasets containing numeric variables. The values are represented by color according to the selected color pallette. By combining class variable and attributes on x and y axes, we see where the attribute values are the strongest and where the weakest, thus enabling us to find typical features for each class. - -The widget enables row selection with click and drag. One can zoom in with Ctrl++ (Cmd++) and zoom out with Ctrl+- (Cmd+-). Ctrl+0 (Cmd+0) resets zoom to the extended version, while Ctrl+9 (Cmd+9) reset it to the default. - -![](images/HeatMap.png) - -1. The color pallette. Choose from linear, diverging, color-blind friendly, or other pallettes. **Low** and **High** are thresholds for the color palette (low for attributes with low values and high for attributes with high values). Selecting one of diverging palettes, which have two extreme colors and a neutral (black or white) color at the midpoint, enables an option to set a meaningful mid-point value (default is 0). -2. Merge rows. If there are too many rows in the visualization, one can merge them with k-means algorithm into N selected clusters (default 50). -3. Cluster columns and rows: - - **None** (lists attributes and rows as found in the dataset) - - **Clustering** (clusters data by similarity with hierarchical clustering on Euclidean distances and with average linkage) - - **Clustering with ordered leaves** (same as clustering, but it additionally maximizes the sum of similarities of adjacent elements) -4. Split rows or columns by a categorical variable. If the data contains a class variable, rows will be automatically split by class. -5. Set what is displayed in the plot in **Annotation & Legend**. - - If *Show legend* is ticked, a color chart will be displayed above the map. - - If *Stripes with averages* is ticked, a new line with attribute averages will be displayed on the left. - **Row Annotations** adds annotations to each instance on the right. Color colors the instances with the corresponding value of the selected categorical variable. - **Column Annotations** adds annotation to each variable at the selected position (default is Top). Color colors the columns with the corresponding value of the selected column annotation. -6. If *Keep aspect ratio* is ticked, each value will be displayed with a square (proportionate to the map). -7. If *Send Automatically* is ticked, changes are communicated automatically. Alternatively, click *Send*. - -### Advanced visualization - -Heat map enables some neat plot enhancements. Such options are clustering of rows and/or columns for better data organization, row and column annotations, and splitting the data by categorical variables. - -Row and column clustering is performed independently. Row clustering is computed from Euclidean distances, while column clustering uses Pearson correlation coefficients. Hierarchical clustering is based on the Ward linkage method. Clustering with optimal leaf ordering reorders left and right branches in the dendrogram to minimize the sum of distances between adjacent leaves (Bar-Joseph et al. 2001). - - - -![](images/HeatMap-advanced.png) - -Examples --------- - -### Gene expresssions - -The **Heat Map** below displays attribute values for the *brown-selected* data set (Brown et al. 2000). Heat maps are particularly appropriate for showing gene expressions and the brown-selected data set contains yeast gene expressions at different conditions. - -Heat map shows low expressions in blue and high expressions in yellow and white. For better organization, we added *Clustering (opt. ordering)* to the columns, which puts columns with similar profiles closer together. In this way we can see the conditions that result in low expressions for ribosomal genes in the lower right corner. - -Additionally, the plot is enhanced with row color on the right, showing which class the rows belong to. - -![](images/HeatMap-Example1.png) - -### Sentiment Analysis - -Heat maps are great for visualizing any kind of comparable numeric variables, for example sentiment in a collection of documents. We will take *book-excerpts* corpus from the **Corpus** widget and pass it to the **Sentiment Analysis** widget, which computes sentiment scores for each document. The output of sentiment analysis are four columns, positive, negative, and neutral sentiment score, and a compound score that aggregates the previous scores into a single number. Positive compound values (white) represent positive documents, while negative (blue) represent negative documents. - -We used row clustering to place similar rows closer together, resulting in clear negative and positive groups. Now we can select negative children's books and explore which are they. - -![](images/HeatMap-Example2.png) - -References ----------- - -Bar-Joseph, Z., Gifford, D.K., Jaakkola, T.S. (2001) Fast optimal leaf ordering for hierarchical clustering, Bioinformatics, 17, 22-29. - -Brown, M.P., Grundy, W.N., Lin, D., Cristianini, N., Sugnet, C., Furey, T.S., Ares, M., Haussler, D. (2000) Knowledge-based analysis of microarray gene expression data by using support vector machines, Proceedings of the National Academy of Sciences, 1, 262-267. diff --git a/doc/visual-programming/source/widgets/visualize/icons/box-plot.png b/doc/visual-programming/source/widgets/visualize/icons/box-plot.png deleted file mode 100644 index 23699ab6cfe..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/box-plot.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/cn2ruleviewer.png b/doc/visual-programming/source/widgets/visualize/icons/cn2ruleviewer.png deleted file mode 100644 index 902f3ff30eb..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/cn2ruleviewer.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/distributions.png b/doc/visual-programming/source/widgets/visualize/icons/distributions.png deleted file mode 100644 index 936afcf6105..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/distributions.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/freeviz.png b/doc/visual-programming/source/widgets/visualize/icons/freeviz.png deleted file mode 100644 index aa240648319..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/freeviz.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/heat-map.png b/doc/visual-programming/source/widgets/visualize/icons/heat-map.png deleted file mode 100644 index 77e88e759f4..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/heat-map.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/line-plot.png b/doc/visual-programming/source/widgets/visualize/icons/line-plot.png deleted file mode 100644 index 0e88721944e..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/line-plot.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/linear-projection.png b/doc/visual-programming/source/widgets/visualize/icons/linear-projection.png deleted file mode 100644 index e6a6db9ad9b..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/linear-projection.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/mosaic-display.png b/doc/visual-programming/source/widgets/visualize/icons/mosaic-display.png deleted file mode 100644 index ae830c13a60..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/mosaic-display.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/nomogram.png b/doc/visual-programming/source/widgets/visualize/icons/nomogram.png deleted file mode 100644 index 4025425b733..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/nomogram.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/pythagorean-forest.png b/doc/visual-programming/source/widgets/visualize/icons/pythagorean-forest.png deleted file mode 100644 index cd4c275f5d1..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/pythagorean-forest.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/pythagorean-tree.png b/doc/visual-programming/source/widgets/visualize/icons/pythagorean-tree.png deleted file mode 100644 index 899933f0333..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/pythagorean-tree.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/radviz.png b/doc/visual-programming/source/widgets/visualize/icons/radviz.png deleted file mode 100644 index f517871755c..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/radviz.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/scatter-map.png b/doc/visual-programming/source/widgets/visualize/icons/scatter-map.png deleted file mode 100644 index f8a39a9260d..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/scatter-map.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/scatter-plot.png b/doc/visual-programming/source/widgets/visualize/icons/scatter-plot.png deleted file mode 100644 index 1f7b859f595..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/scatter-plot.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/sieve-diagram.png b/doc/visual-programming/source/widgets/visualize/icons/sieve-diagram.png deleted file mode 100644 index 7029ce19e3b..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/sieve-diagram.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/silhouette-plot.png b/doc/visual-programming/source/widgets/visualize/icons/silhouette-plot.png deleted file mode 100644 index 7604d9b357e..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/silhouette-plot.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/tree-viewer.png b/doc/visual-programming/source/widgets/visualize/icons/tree-viewer.png deleted file mode 100644 index a3ef4294ccd..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/tree-viewer.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/icons/venn-diagram.png b/doc/visual-programming/source/widgets/visualize/icons/venn-diagram.png deleted file mode 100644 index 2110f002909..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/icons/venn-diagram.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Bar-Plot-Example.png b/doc/visual-programming/source/widgets/visualize/images/Bar-Plot-Example.png deleted file mode 100644 index d0a1a50c11b..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Bar-Plot-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Bar-Plot-stamped.png b/doc/visual-programming/source/widgets/visualize/images/Bar-Plot-stamped.png deleted file mode 100644 index 4599ad116b4..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Bar-Plot-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/BoxPlot-Continuous-small.png b/doc/visual-programming/source/widgets/visualize/images/BoxPlot-Continuous-small.png deleted file mode 100644 index bbfc65f486f..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/BoxPlot-Continuous-small.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/BoxPlot-Continuous.png b/doc/visual-programming/source/widgets/visualize/images/BoxPlot-Continuous.png deleted file mode 100644 index c0eda1e758e..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/BoxPlot-Continuous.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/BoxPlot-Discrete.png b/doc/visual-programming/source/widgets/visualize/images/BoxPlot-Discrete.png deleted file mode 100644 index 9b0c70dc3b0..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/BoxPlot-Discrete.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/BoxPlot-Example1.png b/doc/visual-programming/source/widgets/visualize/images/BoxPlot-Example1.png deleted file mode 100644 index 12a869cf2b2..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/BoxPlot-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/BoxPlot-Example2.png b/doc/visual-programming/source/widgets/visualize/images/BoxPlot-Example2.png deleted file mode 100644 index 3cbc48ed306..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/BoxPlot-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/CN2-Viewer-Example1.png b/doc/visual-programming/source/widgets/visualize/images/CN2-Viewer-Example1.png deleted file mode 100644 index 301a6296ddb..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/CN2-Viewer-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/CN2RuleViewer-stamped.png b/doc/visual-programming/source/widgets/visualize/images/CN2RuleViewer-stamped.png deleted file mode 100644 index f2519790b38..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/CN2RuleViewer-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/CN2RuleViewer-tags.txt b/doc/visual-programming/source/widgets/visualize/images/CN2RuleViewer-tags.txt deleted file mode 100644 index 51a48af0db1..00000000000 --- a/doc/visual-programming/source/widgets/visualize/images/CN2RuleViewer-tags.txt +++ /dev/null @@ -1,3 +0,0 @@ -0 171 400 -1 284 400 -2 742 400 diff --git a/doc/visual-programming/source/widgets/visualize/images/CN2RuleViewer.png b/doc/visual-programming/source/widgets/visualize/images/CN2RuleViewer.png deleted file mode 100644 index 9090c074fa5..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/CN2RuleViewer.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Distributions-Continuous.png b/doc/visual-programming/source/widgets/visualize/images/Distributions-Continuous.png deleted file mode 100644 index b9f92df585e..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Distributions-Continuous.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Distributions-Discrete.png b/doc/visual-programming/source/widgets/visualize/images/Distributions-Discrete.png deleted file mode 100644 index eb87c1baac4..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Distributions-Discrete.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Distributions-NoClass.png b/doc/visual-programming/source/widgets/visualize/images/Distributions-NoClass.png deleted file mode 100644 index cb6a569c9f3..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Distributions-NoClass.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/FreeViz-Example-Explorative.png b/doc/visual-programming/source/widgets/visualize/images/FreeViz-Example-Explorative.png deleted file mode 100644 index bea4bf56d61..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/FreeViz-Example-Explorative.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/FreeViz-selection.png b/doc/visual-programming/source/widgets/visualize/images/FreeViz-selection.png deleted file mode 100644 index 62adc04c854..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/FreeViz-selection.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/HeatMap-Example1.png b/doc/visual-programming/source/widgets/visualize/images/HeatMap-Example1.png deleted file mode 100644 index a93ae5df9ef..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/HeatMap-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/HeatMap-Example2.png b/doc/visual-programming/source/widgets/visualize/images/HeatMap-Example2.png deleted file mode 100644 index 149463a8f51..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/HeatMap-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/HeatMap-advanced.png b/doc/visual-programming/source/widgets/visualize/images/HeatMap-advanced.png deleted file mode 100644 index b47f79dcea0..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/HeatMap-advanced.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/HeatMap.png b/doc/visual-programming/source/widgets/visualize/images/HeatMap.png deleted file mode 100644 index 0c04b6c2ebd..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/HeatMap.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/LinePlot-Example.png b/doc/visual-programming/source/widgets/visualize/images/LinePlot-Example.png deleted file mode 100644 index d0a2f0e9f71..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/LinePlot-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/LinePlot-stamped.png b/doc/visual-programming/source/widgets/visualize/images/LinePlot-stamped.png deleted file mode 100644 index d0f99d20646..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/LinePlot-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/LinearProjection-Example.png b/doc/visual-programming/source/widgets/visualize/images/LinearProjection-Example.png deleted file mode 100644 index de43ff95b00..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/LinearProjection-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/LinearProjection-stamped.png b/doc/visual-programming/source/widgets/visualize/images/LinearProjection-stamped.png deleted file mode 100644 index eccaa5fc5e5..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/LinearProjection-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Mosaic-Display-Example.png b/doc/visual-programming/source/widgets/visualize/images/Mosaic-Display-Example.png deleted file mode 100644 index 5933f0be9a6..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Mosaic-Display-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Mosaic-Display-stamped.png b/doc/visual-programming/source/widgets/visualize/images/Mosaic-Display-stamped.png deleted file mode 100644 index f54f61346fe..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Mosaic-Display-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Mosaic-Display.png b/doc/visual-programming/source/widgets/visualize/images/Mosaic-Display.png deleted file mode 100644 index 81ee93cbe98..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Mosaic-Display.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Nomogram-Example.png b/doc/visual-programming/source/widgets/visualize/images/Nomogram-Example.png deleted file mode 100644 index 2eec917d537..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Nomogram-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Nomogram-Features.png b/doc/visual-programming/source/widgets/visualize/images/Nomogram-Features.png deleted file mode 100644 index c53699b8833..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Nomogram-Features.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Nomogram-LogisticRegression.png b/doc/visual-programming/source/widgets/visualize/images/Nomogram-LogisticRegression.png deleted file mode 100644 index f33481d17fe..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Nomogram-LogisticRegression.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Nomogram-NaiveBayes.png b/doc/visual-programming/source/widgets/visualize/images/Nomogram-NaiveBayes.png deleted file mode 100644 index ea9788b81ea..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Nomogram-NaiveBayes.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Forest-Example.png b/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Forest-Example.png deleted file mode 100644 index afec87bcec9..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Forest-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Forest-stamped.png b/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Forest-stamped.png deleted file mode 100644 index 6f0658fa4bf..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Forest-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Forest-tags.txt b/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Forest-tags.txt deleted file mode 100644 index 1ebb2bf4cc2..00000000000 --- a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Forest-tags.txt +++ /dev/null @@ -1,3 +0,0 @@ -0 171 35 -1 171 89 -2 171 551 diff --git a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree-comparison.png b/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree-comparison.png deleted file mode 100644 index 5618fb15b85..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree-comparison.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree-scatterplot-workflow.png b/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree-scatterplot-workflow.png deleted file mode 100644 index fcf7c66bb06..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree-scatterplot-workflow.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree-scatterplot.png b/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree-scatterplot.png deleted file mode 100644 index 12e9fbe34bc..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree-scatterplot.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree1-continuous.png b/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree1-continuous.png deleted file mode 100644 index af5722f73a4..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree1-continuous.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree1-stamped.png b/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree1-stamped.png deleted file mode 100644 index 94729b7979e..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree1-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree1-tags.txt b/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree1-tags.txt deleted file mode 100644 index 979b78edc7b..00000000000 --- a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree1-tags.txt +++ /dev/null @@ -1,4 +0,0 @@ -0 171 35 -1 171 100 -2 171 241 -3 171 511 diff --git a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree1.png b/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree1.png deleted file mode 100644 index c969c83fb21..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Pythagorean-Tree1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Radviz-Brown-2.png b/doc/visual-programming/source/widgets/visualize/images/Radviz-Brown-2.png deleted file mode 100644 index 80f937b6f10..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Radviz-Brown-2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Radviz-Brown.png b/doc/visual-programming/source/widgets/visualize/images/Radviz-Brown.png deleted file mode 100644 index 35e6021940a..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Radviz-Brown.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/ScatterMap-Example.png b/doc/visual-programming/source/widgets/visualize/images/ScatterMap-Example.png deleted file mode 100644 index 008b12324a5..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/ScatterMap-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/ScatterMap2-stamped.png b/doc/visual-programming/source/widgets/visualize/images/ScatterMap2-stamped.png deleted file mode 100644 index 5a0d6816658..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/ScatterMap2-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/ScatterMap2.png b/doc/visual-programming/source/widgets/visualize/images/ScatterMap2.png deleted file mode 100644 index 60a3d2eeb4b..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/ScatterMap2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/ScatterPlot-selection.png b/doc/visual-programming/source/widgets/visualize/images/ScatterPlot-selection.png deleted file mode 100644 index a425b49d1b8..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/ScatterPlot-selection.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/ScatterPlotExample-Classification.png b/doc/visual-programming/source/widgets/visualize/images/ScatterPlotExample-Classification.png deleted file mode 100644 index df25aae4200..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/ScatterPlotExample-Classification.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/ScatterPlotExample-Explorative.png b/doc/visual-programming/source/widgets/visualize/images/ScatterPlotExample-Explorative.png deleted file mode 100644 index cd0bfa4c104..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/ScatterPlotExample-Explorative.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/ScatterPlotExample-Ranking.png b/doc/visual-programming/source/widgets/visualize/images/ScatterPlotExample-Ranking.png deleted file mode 100644 index 71abf2be38d..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/ScatterPlotExample-Ranking.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Scatterplot-ClassDensity.png b/doc/visual-programming/source/widgets/visualize/images/Scatterplot-ClassDensity.png deleted file mode 100644 index ebc0f8386d9..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Scatterplot-ClassDensity.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/Scatterplot-Iris-stamped.png b/doc/visual-programming/source/widgets/visualize/images/Scatterplot-Iris-stamped.png deleted file mode 100644 index 72dcb0f86fb..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/Scatterplot-Iris-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/ScoringSheetViewer-widget.png b/doc/visual-programming/source/widgets/visualize/images/ScoringSheetViewer-widget.png deleted file mode 100644 index 3d67202186b..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/ScoringSheetViewer-widget.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/ScoringSheetViewer-workflow.png b/doc/visual-programming/source/widgets/visualize/images/ScoringSheetViewer-workflow.png deleted file mode 100644 index 1384ff7aa40..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/ScoringSheetViewer-workflow.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-Example.png b/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-Example.png deleted file mode 100644 index 75e3b4ab549..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-Example1.PNG b/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-Example1.PNG deleted file mode 100644 index a3367df3cb0..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-Example1.PNG and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-Example2.PNG b/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-Example2.PNG deleted file mode 100644 index c29596ea17a..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-Example2.PNG and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-Titanic-age-survived.png b/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-Titanic-age-survived.png deleted file mode 100644 index 7c0f459fb61..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-Titanic-age-survived.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-Titanic.png b/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-Titanic.png deleted file mode 100644 index 842b68d93b3..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-Titanic.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-stamped.png b/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-stamped.png deleted file mode 100644 index df4fa036879..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/SieveDiagram-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/SieveDiagram.png b/doc/visual-programming/source/widgets/visualize/images/SieveDiagram.png deleted file mode 100644 index 5877a05b4a6..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/SieveDiagram.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/SilhouettePlot-Example.png b/doc/visual-programming/source/widgets/visualize/images/SilhouettePlot-Example.png deleted file mode 100644 index dade8110ac3..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/SilhouettePlot-Example.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/SilhouettePlot.png b/doc/visual-programming/source/widgets/visualize/images/SilhouettePlot.png deleted file mode 100644 index c74c198d4a9..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/SilhouettePlot.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/TreeViewer-classification.png b/doc/visual-programming/source/widgets/visualize/images/TreeViewer-classification.png deleted file mode 100644 index 49b4a32cf6a..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/TreeViewer-classification.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/TreeViewer-regression.png b/doc/visual-programming/source/widgets/visualize/images/TreeViewer-regression.png deleted file mode 100644 index 7af73f4bb9a..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/TreeViewer-regression.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/TreeViewer-selection.png b/doc/visual-programming/source/widgets/visualize/images/TreeViewer-selection.png deleted file mode 100644 index 4f66ccc9e9a..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/TreeViewer-selection.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/TreeViewer-stamped.png b/doc/visual-programming/source/widgets/visualize/images/TreeViewer-stamped.png deleted file mode 100644 index a35eecdcffb..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/TreeViewer-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/VennDiagram-Example1.png b/doc/visual-programming/source/widgets/visualize/images/VennDiagram-Example1.png deleted file mode 100644 index 5e5e6cfd9d5..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/VennDiagram-Example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/VennDiagram-Example2.png b/doc/visual-programming/source/widgets/visualize/images/VennDiagram-Example2.png deleted file mode 100644 index c2629cd2316..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/VennDiagram-Example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/VennDiagram-stamped.png b/doc/visual-programming/source/widgets/visualize/images/VennDiagram-stamped.png deleted file mode 100644 index 65b1d55a052..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/VennDiagram-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/ViolinPlot-boxplot.png b/doc/visual-programming/source/widgets/visualize/images/ViolinPlot-boxplot.png deleted file mode 100644 index 0f2dbb53adb..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/ViolinPlot-boxplot.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/ViolinPlot-example1.png b/doc/visual-programming/source/widgets/visualize/images/ViolinPlot-example1.png deleted file mode 100644 index 744a2100fef..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/ViolinPlot-example1.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/ViolinPlot-example2.png b/doc/visual-programming/source/widgets/visualize/images/ViolinPlot-example2.png deleted file mode 100644 index 38767258391..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/ViolinPlot-example2.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/ViolinPlot-stamped.png b/doc/visual-programming/source/widgets/visualize/images/ViolinPlot-stamped.png deleted file mode 100644 index 6ec88c6b6af..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/ViolinPlot-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/freeviz-moveanchor.png b/doc/visual-programming/source/widgets/visualize/images/freeviz-moveanchor.png deleted file mode 100644 index 8d9f3a95644..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/freeviz-moveanchor.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/freeviz-zoo-stamped.png b/doc/visual-programming/source/widgets/visualize/images/freeviz-zoo-stamped.png deleted file mode 100644 index 6e30e022833..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/freeviz-zoo-stamped.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/images/venn-workflow.png b/doc/visual-programming/source/widgets/visualize/images/venn-workflow.png deleted file mode 100644 index e407d7b14b6..00000000000 Binary files a/doc/visual-programming/source/widgets/visualize/images/venn-workflow.png and /dev/null differ diff --git a/doc/visual-programming/source/widgets/visualize/linearprojection.md b/doc/visual-programming/source/widgets/visualize/linearprojection.md deleted file mode 100644 index 54be08a316b..00000000000 --- a/doc/visual-programming/source/widgets/visualize/linearprojection.md +++ /dev/null @@ -1,56 +0,0 @@ -Linear Projection -================= - -A linear projection method with explorative data analysis. - -**Inputs** - -- Data: input dataset -- Data Subset: subset of instances -- Projection: custom projection vectors - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: data with an additional column showing whether a point is selected -- Components: projection vectors - -This widget displays [linear projections](https://en.wikipedia.org/wiki/Projection_(linear_algebra)) of class-labeled data. It supports various types of projections such as circular, [linear discriminant analysis](https://en.wikipedia.org/wiki/Linear_discriminant_analysis), and [principal component analysis](https://en.wikipedia.org/wiki/Principal_component_analysis). - -Consider, for a start, a projection of the *Iris* dataset shown below. Notice that it is the sepal width and sepal length that already separate *Iris setosa* from the other two, while the petal length is the attribute best separating *Iris versicolor* from *Iris virginica*. - -![](images/LinearProjection-stamped.png) - -1. Axes in the projection that are displayed and other available axes. Optimize your projection by using **Suggest Features**. This feature scores attributes and returns the top scoring attributes with a simultaneous visualization update. Feature scoring computes the classification accuracy (for classification) or MSE (regression) of k-nearest neighbors classifier on the projected, two-dimensional data. The score reflects how well the classes in the projection are separated. -2. Choose the type of projection: - - Circular Placement - - [Linear Discriminant Analysis](https://en.wikipedia.org/wiki/Linear_discriminant_analysis); available only for categorical target variable with at least three distinct values in the data. - - [Principal Component Analysis](https://en.wikipedia.org/wiki/Principal_component_analysis) -3. Set the color of the displayed points. Set shape, size, and label to differentiate between points. - *Label only selected points* labels only selected data instances. -4. Adjust plot properties: - - *Symbol size*: set the size of the points. - - *Opacity*: set the transparency of the points. - - *Jittering*: Randomly disperse points with [jittering](https://en.wikipedia.org/wiki/Jitter) to prevent them from overlapping. - - *Hide radius*: Axes inside the radius are hidden. Drag the slider to change the radius. -5. Additional plot properties: - - *Show color regions* colors the graph by class. - - *Show legend* displays a legend on the right. Click and drag the legend to move it. -6. *Select, zoom, pan* and *zoom to fit* are the options for exploring the graph. Manual selection of data instances works as an angular/square selection tool. Double click to move the projection. Scroll in or out for zoom. -7. If *Send automatically* is ticked, changes are communicated automatically. Alternatively, press *Send*. - -Example -------- - -The **Linear Projection** widget works just like other visualization widgets. Below, we connected it to the [File](../data/file.md) widget to see the set projected on a 2-D plane. Then we selected the data for further analysis and connected it to the [Data Table](../data/datatable.md) widget to see the details of the selected subset. - -![](images/LinearProjection-Example.png) - -References ----------- - -Koren Y., Carmel L. (2003). Visualization of labeled data using linear transformations. In Proceedings of IEEE Information Visualization 2003, (InfoVis'03). Available [here](http://citeseerx.ist.psu.edu/viewdoc/download;jsessionid=3DDF0DB68D8AB9949820A19B0344C1F3?doi=10.1.1.13.8657&rep=rep1&type=pdf). - -Boulesteix A.-L., Strimmer K. (2006). Partial least squares: a versatile tool for the analysis of high-dimensional genomic data. Briefings in Bioinformatics, 8(1), 32-44. Abstract [here](http://bib.oxfordjournals.org/content/8/1/32.abstract). - -Leban G., Zupan B., Vidmar G., Bratko I. (2006). VizRank: Data Visualization Guided by Machine Learning. Data Mining and Knowledge Discovery, 13, 119-136. Available [here](http://eprints.fri.uni-lj.si/210/2/1._G._Leban%2C_B._Zupan%2C_G._Vidmar%2C_I._Bratko%2C_Data_Mining_and_Knowledge_Discovery_13%2C_119-36_(2006)..pdf). diff --git a/doc/visual-programming/source/widgets/visualize/lineplot.md b/doc/visual-programming/source/widgets/visualize/lineplot.md deleted file mode 100644 index aa9a83bb720..00000000000 --- a/doc/visual-programming/source/widgets/visualize/lineplot.md +++ /dev/null @@ -1,37 +0,0 @@ -Line Plot -========= - -Visualization of data profiles (e.g., time series). - -**Inputs** - -- Data: input dataset -- Data Subset: subset of instances - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: data with an additional column showing whether a point is selected - -[Line plot](https://en.wikipedia.org/wiki/Line_chart) a type of plot which displays the data as a series of points, connected by straight line segments. It only works for numerical data, while categorical can be used for grouping of the data points. - -![](images/LinePlot-stamped.png) - -1. Information on the input data. -2. Select what you wish to display: - - Lines show individual data instances in a plot. - - Range shows the range of data points between 10th and 90th percentile. - - Mean adds the line for mean value. If group by is selected, means will be displayed per each group value. - - Error bars show the standard deviation of each attribute. -3. Select a categorical attribute to use for grouping of data instances. Use None to show ungrouped data. -4. *Select, zoom, pan and zoom to fit* are the options for exploring the graph. The manual selection of data instances works as a line selection, meaning the data under the selected line plots will be sent on the output. Scroll in or out for zoom. When hovering over an individual axis, scrolling will zoom only by the hovered-on axis (vertical or horizontal zoom). -5. If *Send Automatically* is ticked, changes are communicated automatically. Alternatively, click *Send*. - -Example -------- - -**Line Plot** is a standard visualization widget, which displays data profiles, normally of ordered numerical data. In this simple example, we will display the *iris* data in a line plot, grouped by the iris attribute. The plot shows how petal length nicely separates between class values. - -If we observe this in a [Scatter Plot](../visualize/scatterplot.md), we can confirm this is indeed so. Petal length is an interesting attribute for separation of classes, especially when enhanced with petal width, which is also nicely separated in the line plot. - -![](images/LinePlot-Example.png) diff --git a/doc/visual-programming/source/widgets/visualize/mosaicdisplay.md b/doc/visual-programming/source/widgets/visualize/mosaicdisplay.md deleted file mode 100644 index 6fed3b73d23..00000000000 --- a/doc/visual-programming/source/widgets/visualize/mosaicdisplay.md +++ /dev/null @@ -1,31 +0,0 @@ -Mosaic Display -============== - -Display data in a mosaic plot. - -**Inputs** - -- Data: input dataset -- Data subset: subset of instances - -**Outputs** - -- Selected data: instances selected from the plot - -The **Mosaic plot** is a graphical representation of a two-way frequency table or a contingency table. It is used for visualizing data from two or more qualitative variables and was introduced in 1981 by Hartigan and Kleiner and expanded and refined by Friendly in 1994. It provides the user with the means to more efficiently recognize relationships between different variables. If you wish to read up on the history of Mosaic Display, additional reading is available [here](http://www.datavis.ca/papers/moshist.pdf). - -![](images/Mosaic-Display-stamped.png) - -1. Select the variables you wish to see plotted. -2. Select interior coloring. You can color the interior according to class or you can use the *Pearson residual*, which is the difference between observed and fitted values, divided by an estimate of the standard deviation of the observed value. If *Compare to total* is clicked, a comparison is made to all instances. -3. *Save image* saves the created image to your computer in a .svg or .png format. -4. Produce a report. - -Example -------- - -We loaded the *titanic* dataset and connected it to the **Mosaic Display** widget. We decided to focus on two variables, namely status, sex and survival. We colored the interiors according to Pearson residuals in order to demonstrate the difference between observed and fitted values. - -![](images/Mosaic-Display-Example.png) - -We can see that the survival rates for men and women clearly deviate from the fitted value. diff --git a/doc/visual-programming/source/widgets/visualize/nomogram.md b/doc/visual-programming/source/widgets/visualize/nomogram.md deleted file mode 100644 index 172e7c1653c..00000000000 --- a/doc/visual-programming/source/widgets/visualize/nomogram.md +++ /dev/null @@ -1,48 +0,0 @@ -Nomogram -======== - -Nomograms for visualization of Naive Bayes and Logistic Regression classifiers. - -**Inputs** - -- Classifier: trained classifier -- Data: input dataset - -**Outputs** - -- Features: selected variables, 10 by default - -The **Nomogram** enables some classifier's (more precisely Naive Bayes classifier and Logistic Regression classifier) visual representation. It offers an insight into the structure of the training data and effects of the attributes on the class probabilities. Besides visualization of the classifier, the widget offers interactive support for prediction of class probabilities. A snapshot below shows the nomogram of the Titanic dataset, that models the probability for a passenger not to survive the disaster of the Titanic. - -When there are too many attributes in the plotted dataset, only best ranked ones can be selected for display. It is possible to choose from 'No sorting', 'Name', 'Absolute importance', 'Positive influence' and 'Negative influence' for Naive Bayes representation and from 'No sorting', 'Name' and 'Absolute importance' for Logistic Regression representation. - -The probability for the chosen target class is computed by '1-vs-all' principle, which should be taken in consideration when dealing with multiclass data (alternating probabilities do not sum to 1). To avoid this inconvenience, you can choose to normalize probabilities. - -![](images/Nomogram-NaiveBayes.png) - -1. Select the target class you want to model the probability for. Select, whether you want to normalize the probabilities or not. -2. By default Scale is set to Log odds ration. For easier understanding and interpretation option *Point scale* can be used. The unit is obtained by re-scaling the log odds so that the maximal absolute log odds ratio in the nomogram represents 100 points. -3. Display all attributes or only the best ranked ones. Sort them and set the projection type. - -Continuous attributes can be plotted in 2D (only for Logistic Regression). - -![logreg](images/Nomogram-LogisticRegression.png) - -Examples --------- - -The **Nomogram** widget should be used immediately after trained classifier widget (e.g. [Naive Bayes](../model/naivebayes.md) or [Logistics Regression](../model/logisticregression.md)). It can also be passed a data instance using any widget that enables selection (e.g. [Data Table](../data/datatable.md)) as shown in the workflow below. - -![](images/Nomogram-Example.png) - -Referring to the Titanic dataset once again, 1490 (68%) passengers on Titanic out of 2201 died. To make a prediction, the contribution of each attribute is measured as a point score and the individual point scores are summed to determine the probability. When the value of the attribute is unknown, its contribution is 0 points. Therefore, not knowing anything about the passenger, the total point score is 0 and the corresponding probability equals the unconditional prior. The nomogram in the example shows the case when we know that the passenger is a male adult from the first class. The points sum to -0.36, with a corresponding probability of not surviving of about 53%. - -#### Features output - -The second example shows how to use the Features output. Let us use *heart_disease* data for this exercise and load it in the File widget. Now connect File to [Naive Bayes](../model/naivebayes.md) (or [Logistic Regression](../model/logisticregression.md)) and add Nomogram to Naive Bayes. Finally, connect File to [Select Columns](../data/selectcolumns.md). - -Select Columns selects a subset of variables, while Nomogram shows the top scoring variables for the trained classifier. To filter the data by the variables selected in the Nomogram, connect Nomogram to Select Columns as shown below. Nomogram will pass a list of selected variables to Select Columns, which will retain only the variables from the list. For this to work, you have to press *Use input features* in Select Columns (or tick it to always apply it). - -We have selected the top 5 variables in Nomogram and used Select Columns to retain only those variables. - -![](images/Nomogram-Features.png) diff --git a/doc/visual-programming/source/widgets/visualize/pythagoreanforest.md b/doc/visual-programming/source/widgets/visualize/pythagoreanforest.md deleted file mode 100644 index ce287583625..00000000000 --- a/doc/visual-programming/source/widgets/visualize/pythagoreanforest.md +++ /dev/null @@ -1,40 +0,0 @@ -Pythagorean Forest -================== - -Pythagorean forest for visualizing random forests. - -**Inputs** - -- Random Forest: tree models from random forest - -**Outputs** - -- Tree: selected tree model - -**Pythagorean Forest** shows all learned decision tree models from [Random Forest](../model/randomforest.md) widget. It displays them as Pythagorean trees, each visualization pertaining to one randomly constructed tree. In the visualization, you can select a tree and display it in [Pythagorean Tree](../visualize/pythagoreantree.md) widget. The best tree is the one with the shortest and most strongly colored branches. This means few attributes split the branches well. - -Widget displays both classification and regression results. Classification requires discrete target variable in the dataset, while regression requires a continuous target variable. Still, they both should be fed a [Tree](../model/tree.md) on the input. - -![](images/Pythagorean-Forest-stamped.png) - -1. Information on the input random forest model. -2. Display parameters: - - *Depth*: set the depth to which the trees are grown. - - *Target class*: set the target class for coloring the trees. If *None* is selected, the tree will be white. If the input is a classification tree, you can color the nodes by their respective class. If the input is a regression tree, the options are *Class mean*, which will color tree nodes by the class mean value and *Standard deviation*, which will color them by the standard deviation value of the node. - - *Size*: set the size of the nodes. *Normal* will keep the nodes the size of the subset in the node. *Square root* and *Logarithmic* are the respective transformations of the node size. - - *Zoom*: allows you to see the size of the tree visualizations. -3. *Save Image*: save the visualization to your computer as a *.svg* or *.png* file. *Report*: produce a report. - -Example -------- - -**Pythagorean Forest** is great for visualizing several built trees at once. In the example below, we've used *housing* dataset and plotted all 10 trees we've grown with [Random Forest](../model/randomforest.md). When changing the parameters in Random Forest, visualization in Pythagorean Forest will change as well. - -Then we've selected a tree in the visualization and inspected it further with [Pythagorean Tree](../visualize/pythagoreantree.md) widget. - -![](images/Pythagorean-Forest-Example.png) - -References ----------- - -Beck, F., Burch, M., Munz, T., Di Silvestro, L. and Weiskopf, D. (2014). Generalized Pythagoras Trees for Visualizing Hierarchies. In IVAPP '14 Proceedings of the 5th International Conference on Information Visualization Theory and Applications, 17-28. diff --git a/doc/visual-programming/source/widgets/visualize/pythagoreantree.md b/doc/visual-programming/source/widgets/visualize/pythagoreantree.md deleted file mode 100644 index 7d94336fa8b..00000000000 --- a/doc/visual-programming/source/widgets/visualize/pythagoreantree.md +++ /dev/null @@ -1,51 +0,0 @@ -Pythagorean Tree -================ - -Pythagorean tree visualization for classification or regression trees. - -**Inputs** - -- Tree: tree model -- Selected Data: instances selected from the tree - -**Pythagorean Trees** are plane fractals that can be used to depict general tree hierarchies as presented in an article by [Fabian Beck and co-authors](http://publications.fbeck.com/ivapp14-pythagoras.pdf). In our case, they are used for visualizing and exploring tree models, such as [Tree](../model/tree.md). - -![](images/Pythagorean-Tree1-stamped.png) - -1. Information on the input tree model. -2. Visualization parameters: - - *Depth*: set the depth of displayed trees. - - *Target class* (for classification trees): the intensity of the color for nodes of the tree will correspond to the probability of the target class. If *None* is selected, the color of the node will denote the most probable class. - - *Node color* (for regression trees): node colors can correspond to mean or standard deviation of class value of the training data instances in the node. - - *Size*: define a method to compute the size of the square representing the node. *Normal* will keep node sizes correspond to the size of training data subset in the node. *Square root* and *Logarithmic* are the respective transformations of the node size. - - *Log scale factor* is only enabled when *logarithmic* transformation is selected. You can set the log factor between 1 and 10. -3. Plot properties: - - *Enable tooltips*: display node information upon hovering. - - *Show legend*: shows color legend for the plot. -4. Reporting: - - *Save Image*: save the visualization to a SVG or PNG file. - - *Report*: add visualization to the report. - -Pythagorean Tree can visualize both classification and regression trees. Below is an example for regression tree. The only difference between the two is that regression tree doesn't enable coloring by class, but can color by class mean or standard deviation. - -![](images/Pythagorean-Tree1-continuous.png) - -Example -------- - -The workflow from the screenshot below demonstrates the difference between [Tree Viewer](../visualize/treeviewer.md) and Pythagorean Tree. They can both visualize [Tree](../model/tree.md), but Pythagorean visualization takes less space and is more compact, even for a small [Iris flower](https://en.wikipedia.org/wiki/Iris_flower_data_set) dataset. For both visualization widgets, we have hidden the control area on the left by clicking on the splitter between control and visualization area. - -![](images/Pythagorean-Tree-comparison.png) - -Pythagorean Tree is interactive: click on any of the nodes (squares) to select training data instances that were associated with that node. The following workflow explores these feature. - -![](images/Pythagorean-Tree-scatterplot-workflow.png) - -The selected data instances are shown as a subset in the [Scatter Plot](../visualize/scatterplot.md), sent to the [Data Table](../data/datatable.md) and examined in the [Box Plot](../visualize/boxplot.md). We have used brown-selected dataset in this example. The tree and scatter plot are shown below; the selected node in the tree has a black outline. - -![](images/Pythagorean-Tree-scatterplot.png) - -References ----------- - -Beck, F., Burch, M., Munz, T., Di Silvestro, L. and Weiskopf, D. (2014). [Generalized Pythagoras Trees for Visualizing Hierarchies](http://publications.fbeck.com/ivapp14-pythagoras.pdf). In IVAPP '14 Proceedings of the 5th International Conference on Information Visualization Theory and Applications, 17-28. diff --git a/doc/visual-programming/source/widgets/visualize/radviz.md b/doc/visual-programming/source/widgets/visualize/radviz.md deleted file mode 100644 index c0bc1b536ba..00000000000 --- a/doc/visual-programming/source/widgets/visualize/radviz.md +++ /dev/null @@ -1,38 +0,0 @@ -Radviz -====== - -Radviz vizualization with explorative data analysis and intelligent data -visualization enhancements. - -**Inputs** - -- Data: input dataset -- Data Subset: subset of instances - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: data with an additional column showing whether a point is selected -- Components: Radviz vectors - -Radviz (Hoffman et al. 1997) is a non-linear multi-dimensional visualization technique that can display data defined by three or more variables in a 2-dimensional projection. The visualized variables are presented as anchor points equally spaced around the perimeter of a unit circle. Data instances are shown as points inside the circle, with their positions determined by a metaphor from physics: each point is held in place with springs that are attached at the other end to the variable anchors. The stiffness of each spring is proportional to the value of the corresponding variable and the point ends up at the position where the spring forces are in equilibrium. Prior to visualization, variable values are scaled to lie between 0 and 1. Data instances that are close to a set of variable anchors have higher values for these variables than for the others. - -The snapshot shown below shows a Radviz widget with a visualization of the dataset from functional genomics (Brown et al. 2000). In this particular visualization the data instances are colored according to the corresponding class, and the visualization space is colored according to the computed class probability. Notice that the particular visualization very nicely separates data instances of different class, making the visualization interesting and potentially informative. - -![](images/Radviz-Brown.png) - -Just like all point-based visualizations, this widget includes tools for intelligent data visualization (VizRank, see Leban et al. 2006) and an interface for explorative data analysis - selection of data points in visualization. Just like the [Scatter Plot](../visualize/scatterplot.md) widget, it can be used to find a set of variables that would result in an interesting visualization. The Radviz graph above is according to this definition an example of a very good visualization, while the one below - where we show an VizRank's interface (*Suggest features* button) -with a list of 3-attribute visualizations and their scores - is not. - -![](images/Radviz-Brown-2.png) - -References ----------- - -Hoffman, P. E. et al. (1997) DNA visual and analytic data mining. In the Proceedings of the IEEE Visualization. Phoenix, AZ, pp. 437-441. - -Brown, M. P., W. N. Grundy et al. (2000). "Knowledge-based analysis of microarray gene expression data by using support vector machines." Proc Natl Acad Sci U S A 97(1): 262-7. - -Leban, G., B. Zupan et al. (2006). "VizRank: Data Visualization Guided by Machine Learning." Data Mining and Knowledge Discovery 13(2): 119-136. - -Mramor, M., G. Leban, J. Demsar, and B. Zupan. Visualization-based cancer microarray data classification analysis. Bioinformatics 23(16): 2147-2154, 2007. diff --git a/doc/visual-programming/source/widgets/visualize/scatterplot.md b/doc/visual-programming/source/widgets/visualize/scatterplot.md deleted file mode 100644 index a5e6b517dda..00000000000 --- a/doc/visual-programming/source/widgets/visualize/scatterplot.md +++ /dev/null @@ -1,80 +0,0 @@ -Scatter Plot -============ - -Scatter plot visualization with exploratory analysis and intelligent data visualization enhancements. - -**Inputs** - -- Data: input dataset -- Data Subset: subset of instances -- Features: list of attributes - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: data with an additional column showing whether a point is selected - -The **Scatter Plot** widget provides a 2-dimensional scatter plot visualization. The data is displayed as a collection of points, each having the value of the x-axis attribute determining the position on the horizontal axis and the value of the y-axis attribute determining the position on the vertical axis. Various properties of the graph, like color, size and shape of the points, axis titles, maximum point size and jittering can be adjusted on the left side of the widget. A snapshot below shows the scatter plot of the *Iris* dataset with the coloring matching of the class attribute. - -![](images/Scatterplot-Iris-stamped.png) - -1. Select the x and y attribute. Optimize your projection with **Find Informative Projections**. This feature scores attribute pairs by average classification accuracy and returns the top scoring pair with a simultaneous visualization update. -2. *Attributes*: Set the color of the displayed points (you will get colors for categorical values and blue-green-yellow points for numeric). Set label, shape and size to differentiate between points. *Label only selected points* allows you to select individual data instances and label only those. -3. Set symbol size and opacity for all data points. Set [jittering](https://en.wikipedia.org/wiki/Jitter) to prevent the dots overlapping. Jittering will randomly scatter point only around categorical values. If *Jitter numeric values* is checked, points are also scattered around their actual numeric values. - - *Show color regions* colors the graph by class (see the screenshot below). - - *Show legend* displays a legend on the right. Click and drag the legend to move it. - - *Show gridlines* displays the grid behind the plot. - - *Show all data on mouse hover* enables information bubbles if the cursor is placed on a dot. - - *Show regression line* draws the regression line for pair of numeric attributes. If a categorical variable is selected for coloring the plot, individual regression lines for each class value will be displayed. The reported r value corresponds to the `rvalue` from [linear least-squares regression](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.linregress.html), which is equal to the Pearson's correlation coefficient. - - *Treat variables as independent* fits regression line to a group of points (minimize distance from points), rather than fitting y as a function of x (minimize vertical distances). -4. *Select, zoom, pan and zoom to fit* are the options for exploring the graph. The manual selection of data instances works as an angular/square selection tool. Double click to move the projection. Scroll in or out for zoom. -5. If *Send automatically* is ticked, changes are communicated automatically. Alternatively, press *Send*. - -Here is an example of the **Scatter Plot** widget if the *Show color regions* and *Show regression line* boxes are ticked. - -![](images/Scatterplot-ClassDensity.png) - -Intelligent Data Visualization ------------------------------- - -If a dataset has many attributes, it is impossible to manually scan through all the pairs to find interesting or useful scatter plots. Orange implements intelligent data visualization with the **Find Informative Projections** option in the widget. - -If a categorical variable is selected in the Color section, the [score](http://eprints.fri.uni-lj.si/210/) is computed as follows. For each data instance, the method finds 10 nearest neighbors in the projected 2D space, that is, on the combination of attribute pairs. It then checks how many of them have the same color. The total score of the projection is then the average number of same-colored neighbors. - -Computation for numeric colors is similar, except that the [coefficient of determination](https://en.wikipedia.org/wiki/Coefficient_of_determination) is used for measuring the local homogeneity of the projection. - -To use this method, go to the *Find Informative Projections* option in the widget, open the subwindow and press *Start Evaluation*. The feature will return a list of attribute pairs by average classification accuracy score. - -Below, there is an example demonstrating the utility of ranking. The first scatter plot projection was set as the default sepal width to sepal length plot (we used the Iris dataset for simplicity). Upon running *Find Informative Projections* optimization, the scatter plot converted to a much better projection of petal width to petal length plot. - -![](images/ScatterPlotExample-Ranking.png) - -Selection ---------- - -Selection can be used to manually defined subgroups in the data. Use Shift modifier when selecting data instances to put them into a new group. Shift + Ctrl (or Shift + Cmd on macOs) appends instances to the last group. - -Signal data outputs a data table with an additional column that contains group indices. - -![](images/ScatterPlot-selection.png) - -Exploratory Data Analysis -------------------------- - -The **Scatter Plot**, as the rest of Orange widgets, supports zooming-in and out of part of the plot and a manual selection of data instances. These functions are available in the lower left corner of the widget. - -The default tool is *Select*, which selects data instances within the chosen rectangular area. *Pan* enables you to move the scatter plot around the pane. With *Zoom* you can zoom in and out of the pane with a mouse scroll, while *Reset zoom* resets the visualization to its optimal size. An example of a simple schema, where we selected data instances from a rectangular region and sent them to the [Data Table](../data/datatable.md) widget, is shown below. Notice that the scatter plot doesn't show all 52 data instances, because some data instances overlap (they have the same values for both attributes used). - -![](images/ScatterPlotExample-Explorative.png) - -Example -------- - -The **Scatter Plot** can be combined with any widget that outputs a list of selected data instances. In the example below, we combine [Tree](../model/tree.md) and **Scatter Plot** to display instances taken from a chosen decision tree node (clicking on any node of the tree will send a set of selected data instances to the scatter plot and mark selected instances with filled symbols). - -![](images/ScatterPlotExample-Classification.png) - -References ----------- - -Gregor Leban and Blaz Zupan and Gaj Vidmar and Ivan Bratko (2006) VizRank: Data Visualization Guided by Machine Learning. Data Mining and Knowledge Discovery, 13 (2). pp. 119-136. Available [here](http://eprints.fri.uni-lj.si/210/). diff --git a/doc/visual-programming/source/widgets/visualize/scoringsheetviewer.md b/doc/visual-programming/source/widgets/visualize/scoringsheetviewer.md deleted file mode 100644 index c05d5c204e0..00000000000 --- a/doc/visual-programming/source/widgets/visualize/scoringsheetviewer.md +++ /dev/null @@ -1,28 +0,0 @@ -Scoring Sheet Viewer -================ -A widget for visualizing the scoring sheet predictions. - -**Inputs** - -- Classifier: a trained scoring sheet model -- Data: dataset used to visualize the predictions on different instances - -**Outputs** - -- Features: features used in the scoring sheet - -![](images/ScoringSheetViewer-widget.png) - -**Scoring Sheet Viewer** widget offers a simple and intuitive way of visualizing the predictions of the scoring sheet model. The widget takes as input a trained scoring sheet model and a optional dataset (instance) on which we want to visualize the predictions. The widget presents us with a table that visualizes each feature's contribution to the final score, where a higher score indicates a greater chance for an individual to be classified with the target class. Each feature's contribution can be positive or negative, indicating whether it increases or decreases the risk. - - -Example -------- - -![](images/ScoringSheetViewer-workflow.png) - -In this example, we first sample the data, with a portion used to train the Scoring Sheet model and a part routed to the Table widget. This setup allows us to select instances and observe how the scoring sheet performs with new, unseen data. - -Let's analyze and learn to interpret the scoring sheet using the example. It features five decision parameters, with points ranging from -5 to 5. We have set the target class to '1,' indicating the 'presence' of heart disease. Positive-value decision parameters increase the risk of heart disease, while those with negative values reduce it. - -Consider a selected instance from the Data Table widget. It has a 'slope peak exc ST' attribute value of 'upsloping', which reduces the heart disease risk by 3 points. However, it also has the 'chest pain' attribute set to 'asymptomatic', increasing the risk by 5 points. This combination results in a total score of 2, corresponding to a 71.6% probability of having heart disease. diff --git a/doc/visual-programming/source/widgets/visualize/sievediagram.md b/doc/visual-programming/source/widgets/visualize/sievediagram.md deleted file mode 100644 index cfb6829a1b4..00000000000 --- a/doc/visual-programming/source/widgets/visualize/sievediagram.md +++ /dev/null @@ -1,42 +0,0 @@ -Sieve Diagram -============= - -Plots a sieve diagram for a pair of attributes. - -**Inputs** - -- Data: input dataset - -A **Sieve Diagram** is a graphical method for visualizing frequencies in a two-way contingency table and comparing them to [expected frequencies](http://cnx.org/contents/d396c4ad-2fd7-47cd-be84-152b44880feb@2/What-is-an-expected-frequency) under assumption of independence. It was proposed by Riedwyl and Schüpbach in a technical report in 1983 and later called a parquet diagram (Riedwyl and Schüpbach 1994). In this display, the area of each rectangle is proportional to the expected frequency, while the observed frequency is shown by the number of squares in each rectangle. The difference between observed and expected frequency (proportional to the standard Pearson residual) appears as the density of shading, using color to indicate whether the deviation from independence is positive (blue) or negative (red). - -![](images/SieveDiagram-stamped.png) - -1. Select the attributes you want to display in the sieve plot. -2. Score combinations enables you to fin the best possible combination of attributes. -3. *Save Image* saves the created image to your computer in a .svg or .png format. -4. Produce a report. - -The snapshot below shows a sieve diagram for the *Titanic* dataset and has the attributes *sex* and *survived* (the latter is a class attribute in this dataset). The plot shows that the two variables are highly associated, as there are substantial differences between observed and expected frequencies in all of the four quadrants. For example, and as highlighted in the balloon, the chance for surviving the accident was much higher for female passengers than expected (0.06 vs. 0.15). - -![](images/SieveDiagram-Titanic.png) - -Pairs of attributes with interesting associations have a strong shading, such as the diagram shown in the above snapshot. For contrast, a sieve diagram of the least interesting pair (age vs. survival) is shown below. - -![](images/SieveDiagram-Titanic-age-survived.png) - -Example -------- - -Below, we see a simple schema using the *Titanic* dataset, where we use the -[Rank](../data/rank.md) widget to select the best attributes (the ones with the highest information gain, gain ratio or Gini index) and feed them into the **Sieve Diagram**. This displays the sieve plot for the two best attributes, which in our case are sex and status. We see that the survival rate on the Titanic was very high for women of the first class and very low for female crew members. - -![](images/SieveDiagram-Example2.PNG) - -The **Sieve Diagram** also features the *Score Combinations* option, which makes the ranking of attributes even easier. - -![](images/SieveDiagram-Example1.PNG) - -References ----------- - -Riedwyl, H., and Schüpbach, M. (1994). Parquet diagram to plot contingency tables. In Softstat '93: Advances in Statistical Software, F. Faulbaum (Ed.). New York: Gustav Fischer, 293-299. diff --git a/doc/visual-programming/source/widgets/visualize/silhouetteplot.md b/doc/visual-programming/source/widgets/visualize/silhouetteplot.md deleted file mode 100644 index 021e768093e..00000000000 --- a/doc/visual-programming/source/widgets/visualize/silhouetteplot.md +++ /dev/null @@ -1,40 +0,0 @@ - -Silhouette Plot -=============== - -A graphical representation of consistency within clusters of data. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: data with an additional column showing whether a point is selected - -The **Silhouette Plot** widget offers a graphical representation of consistency within clusters of data and provides the user with the means to visually assess cluster quality. The silhouette score is a measure of how similar an object is to its own cluster in comparison to other clusters and is crucial in the creation of a silhouette plot. The silhouette score close to 1 indicates that the data instance is close to the center of the cluster and instances possessing the silhouette scores close to 0 are on the border between two clusters. - -![](images/SilhouettePlot.png) - -1. Choose the distance metric. You can choose between: - - [Euclidean](https://en.wikipedia.org/wiki/Euclidean_distance) ("straight line" distance between two points) - - [Manhattan](https://en.wiktionary.org/wiki/Manhattan_distance) (the sum of absolute differences for all attributes) - - [Cosine](https://en.wiktionary.org/wiki/Cosine_similarity) (1 - cosine of the angle between two vectors) -2. Select the cluster label. You can decide whether to group the instances by cluster or not. -3. Display options: - - *Choose bar width*. - - *Annotations*: annotate the silhouette plot. -4. If *Send automatically* is ticked, changes are communicated automatically. Alternatively, press *Send* -5. The created silhouette plot shows the silhouette score for each instance in the data. - The group's silhouette score is beside the group's name in the parenthesis. -6. Access help, save image, produce a report. On the right, the information on input and output are shown. - -Example -------- - -In the snapshot below, we have decided to use the **Silhouette Plot** on the *iris* dataset. We selected data instances with low silhouette scores and passed them on as a subset to the [Scatter Plot](../visualize/scatterplot.md) widget. This visualization only confirms the accuracy of the **Silhouette Plot** widget, as you can clearly see that the subset lies in the border between two clusters. - -![](images/SilhouettePlot-Example.png) - -If you are interested in other uses of the **Silhouette Plot** widget, feel free to explore our [blog post](http://blog.biolab.si/2016/03/23/all-i-see-is-silhouette/). diff --git a/doc/visual-programming/source/widgets/visualize/treeviewer.md b/doc/visual-programming/source/widgets/visualize/treeviewer.md deleted file mode 100644 index 83c07bc4236..00000000000 --- a/doc/visual-programming/source/widgets/visualize/treeviewer.md +++ /dev/null @@ -1,53 +0,0 @@ -Tree Viewer -=========== - -A visualization of classification and regression trees. - -**Inputs** - -- Tree: decision tree - -**Outputs** - -- Selected Data: instances selected from the tree node -- Data: data with an additional column showing whether a point is selected - -This is a versatile widget with 2-D visualization of [classification and regression trees](https://en.wikipedia.org/wiki/Decision_tree_learning). The user can select a node, instructing the widget to output the data associated with the node, thus enabling explorative data analysis. - -![](images/TreeViewer-stamped.png) - -1. Information on the input. -2. Display options: - - Zoom in and zoom out - - Select the tree width. The nodes display information bubbles when hovering over them. - - Select the depth of your tree. - - Select edge width. The edges between the nodes in the tree graph are drawn based on the selected edge width. - - All the edges will be of equal width if *Fixed* is chosen. - - When *Relative to root* is selected, the width of the edge will - correspond to the proportion of instances in the corresponding - node with respect to all the instances in the training data. Under - this selection, the edge will get thinner and thinner when - traversing toward the bottom of the tree. - - *Relative to parent* makes the edge width correspond to the proportion - of instances in the nodes with respect to the instances in their - parent node. - - Define the target class, which you can change based on classes in the data. -3. Press *Save image* to save the created tree graph to your computer as a *.svg* or *.png* file. -4. Produce a report. - -Examples --------- - -Below, is a simple classification schema, where we have read the data, constructed the decision tree and viewed it in our **Tree Viewer**. If both the viewer and [Tree](../model/tree.md) are open, any re-run of the tree induction algorithm will immediately affect the visualization. You can thus use this combination to explore how the parameters of the induction algorithm influence the structure of the resulting tree. - -![](images/TreeViewer-classification.png) - -Clicking on any node will output the related data instances. This is explored in the schema below that shows the subset in the data table and in the [Scatter Plot](../visualize/scatterplot.md). Make sure that the tree data is passed as a data subset; this can be done by connecting the **Scatter Plot** to the [File](../data/file.md) widget first, and connecting it to the **Tree Viewer** widget next. Selected data will be displayed as bold dots. - -**Tree Viewer** can also export labeled data. Connect [Data Table](../data/datatable.md) to **Tree Viewer** and set the link between widgets to *Data* instead of *Selected Data*. This will send the entire data to **Data Table** with an additional meta column labeling selected data instances (*Yes* for selected and *No* for the remaining). - -![](images/TreeViewer-selection.png) - -Finally, **Tree Viewer** can be used also for visualizing regression trees. Connect [Random Forest](../model/randomforest.md) to [File](../data/file.md) widget using *housing.tab* dataset. Then connect [Pythagorean Forest](../visualize/pythagoreanforest.md) to **Random Forest**. In **Pythagorean Forest** select a regression tree you wish to further analyze and pass it to the **Tree Viewer**. The widget will display the constructed tree. For visualizing larger trees, especially for regression, [Pythagorean Tree](../visualize/pythagoreantree.md) could be a better option. - -![](images/TreeViewer-regression.png) diff --git a/doc/visual-programming/source/widgets/visualize/venndiagram.md b/doc/visual-programming/source/widgets/visualize/venndiagram.md deleted file mode 100644 index bc6b33ab2d9..00000000000 --- a/doc/visual-programming/source/widgets/visualize/venndiagram.md +++ /dev/null @@ -1,40 +0,0 @@ -Venn Diagram -============ - -Plots a [Venn diagram](http://en.wikipedia.org/wiki/Venn_diagram) for two or more data subsets. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: entire data with a column indicating whether an instance was selected or not - -The **Venn Diagram** widget displays logical relations between datasets by showing the number of common data instances (rows) or the number of shared features (columns). Selecting a part of the visualization outputs the corresponding instances or features. - -![](images/venn-workflow.png) - -![](images/VennDiagram-stamped.png) - -1. Select whether to count common features or instances. -2. Select whether to include duplicates or to output only unique rows; applicable only when matching instances by values of variables. - -Rows can be matched -- by their identity, e.g. rows from different data sets match if they came from the same row in a file, -- by equality, if all tables contain the same variables, -- or by values of a string variable that appears in all tables. - -Examples --------- - -The easiest way to use the **Venn Diagram** is to select data subsets and find matching instances in the visualization. We use the *breast-cancer* dataset to select two subsets with [Select Rows](../data/selectrows.md) widget - the first subset is that of breast cancer patients aged between 40 and 49 and the second is that of patients with a tumor size between 20 and 29. The **Venn Diagram** helps us find instances that correspond to both criteria, which can be found in the intersection of the two circles. - -![](images/VennDiagram-Example1.png) - -The **Venn Diagram** widget can be also used for exploring different prediction models. In the following example, we analysed 3 prediction methods, namely [Naive Bayes](../model/naivebayes.md), [SVM](../model/svm.md) and [Random Forest](../model/randomforest.md), according to their misclassified instances. - -By selecting misclassifications in the three [Confusion Matrix](../evaluate/confusionmatrix.md) widgets and sending them to Venn diagram, we can see all the misclassification instances visualized per method used. Then we open **Venn Diagram** and select, for example, the misclassified instances that were identified by all three methods. This is represented as an intersection of all three circles. Click on the intersection to see this two instances marked in the [Scatter Plot](../visualize/scatterplot.md) widget. Try selecting different diagram sections to see how the scatter plot visualization changes. - -![](images/VennDiagram-Example2.png) diff --git a/doc/visual-programming/source/widgets/visualize/violinplot.md b/doc/visual-programming/source/widgets/visualize/violinplot.md deleted file mode 100644 index 6ee61781b1f..00000000000 --- a/doc/visual-programming/source/widgets/visualize/violinplot.md +++ /dev/null @@ -1,48 +0,0 @@ -Violin Plot -=========== - -Visualize the distribution of feature values in a violin plot. - -**Inputs** - -- Data: input dataset - -**Outputs** - -- Selected Data: instances selected from the plot -- Data: data with an additional column showing whether a point is selected - -The **Violin Plot** widget plays a similar role as a [Box Plot](boxplot.md). It shows the distribution of quantitative data across several levels of a categorical variable such that those distributions can be compared. Unlike the Box Plot, in which all of the plot components correspond to actual data points, the Violin Plot features a kernel density estimation of the underlying distribution. - -![](images/ViolinPlot-stamped.png) - - -1. Select the variable you want to plot. Tick *Order by relevance to subgroups* to order variables by Chi2 or ANOVA over the selected subgroup. -2. Choose *Subgroups* to see [violin plots](https://en.wikipedia.org/wiki/Violin_plot) displayed by a discrete subgroup. Tick *Order by relevance to variable* to order subgroups by Chi2 or ANOVA over the selected variable. -3. *Box plot*: Tick to show the underlying box plot. - ![](images/ViolinPlot-boxplot.png) - - *Density dots*: Tick to show the underlying data represented by points. - - *Density lines*: Tick to show the underlying data represented by lines. - - *Order subgroups*: Tick to order violins by *median* (ascending). - - *Show grid*: Tick to show a horizontal grid on the graph. - - *Orientation*: Determine violin orientation. - -4. *Kernel*: Select the [kernel](https://en.wikipedia.org/wiki/Kernel_(statistics)) used to estimate the density. Possible kernels are: [*Normal*](https://en.wikipedia.org/wiki/Normal_distribution), [*Epanechnikov*](https://en.wikipedia.org/wiki/Epanechnikov_distribution) and *Linear*. - - *Scale*: Select the method used to scale the width of each violin. If *area* is selected, each violin will have the same area. If *count* is selected, the width of the violins will be scaled by the number of observations in that bin. If *width* is selected, each violin will have the same width. - -Examples --------- - -The **Violin Plot** widget is most commonly used immediately after the [File](../data/file.md) widget to observe the statistical properties of a dataset. In the first example, we have used *heart-disease* data to inspect our variables. - -![](images/ViolinPlot-example1.png) - -The **Violin Plot** could also be used for *outlier detection*. In the next example we eliminate the outliers by selecting only instances that fall inside the [Q1 − 1.5 and Q3 + 1.5 IQR](https://en.wikipedia.org/wiki/Interquartile_range). - -![](images/ViolinPlot-example2.png)