Merge branch 'main' into 756-add-notebook-for-eulaw

dianna-ai · Oct 9, 2024 · f762245 · f762245
2 parents 3f3e384 + 5b8922d
commit f762245
Show file tree

Hide file tree

Showing 15 changed files with 744 additions and 533 deletions.
diff --git a/README.md b/README.md
@@ -197,9 +197,11 @@ The default hyperparameters used in DIANNA for each explainer as well as the val
 Explore the explanations of your trained model using the DIANNA dashboard (for now images, text and time series classification is supported).
 [Click here](https://github.com/dianna-ai/dianna/tree/main/dianna/dashboard) for more information.
 
-<a href="https://github.com/dianna-ai/dianna/tree/main/dianna/dashboard" target="_blank">
-  <img width="1000" align="center" alt="Dianna dashboard screenshot" src="https://raw.githubusercontent.com/dianna-ai/dianna/main/dianna/dashboard/dashboard-screenshot.png">
+_Dianna dashboard screenshot here_
+<!-- <a href="https://github.com/dianna-ai/dianna/tree/main/dianna/dashboard" target="_blank">
+   <img width="1000" align="center" alt="Dianna dashboard screenshot" src="https://raw.githubusercontent.com/dianna-ai/dianna/main/dianna/dashboard/dashboard-screenshot.png">
 </a>
+-->
 
 ## Datasets
 

diff --git a/dianna/dashboard/Home.py b/dianna/dashboard/Home.py
@@ -38,20 +38,64 @@
 
 # Display the content of the selected page
 if selected == "Home":
-    st.image(str(data_directory / 'logo.png'))
 
-    st.markdown("""
-    DIANNA is a Python package that brings explainable AI (XAI) to your research project.
-    It wraps carefully selected XAI methods in a simple, uniform interface. It's built by,
-    with and for (academic) researchers and research software engineers working on machine
-    learning projects.
+    _, col, _ = st.columns([1, 3, 1])
+    with col:
+        st.markdown("""#""")
 
-    ### More information
 
-    - [Source code](https://github.com/dianna-ai/dianna)
-    - [Documentation](https://dianna.readthedocs.io/)
-    """,
-                unsafe_allow_html=True)
+        st.image(str(data_directory / 'logo.png'), width = 360)
+
+        st.markdown("""
+        **DIANNA** (Deep Insight And Neural Network Analysis) is a Python package that brings explainable AI (XAI)
+        to your research project. <br>
+        It wraps _systematically_ selected XAI methods (**explainers**) in a simple, uniform interface.<br>
+        The currently supported explainers are [RISE](http://bmvc2018.org/contents/papers/1064.pdf),
+        [LIME](https://www.kdd.org/kdd2016/papers/files/rfp0573-ribeiroA.pdf) and
+        [KernelSHAP](https://proceedings.neurips.cc/paper/2017/file/8a20a8621978632d76c43dfd28b67767-Paper.pdf).<br>
+        It's built by, with, and for academic researchers and research software engineers
+        who use AI, but users not need to be XAI experts! <br>
+        DIANNA supports the de-facto standard format of neural network models - [ONNX](https://onnx.ai/:).
+
+        ### Dashboard
+        The DIANNA dashboard can be used to visualise the explanation of the outcomes of several ONNX models
+        trained for the tasks and datasets presented
+        in the [DIANNA Tutorials](https://github.com/dianna-ai/dianna/tree/main/tutorials#datasets-and-tasks).
+
+        The dashboard shows the visual explanation of a models' outcome
+        on a selected data _instance_ by one or more selected explainers. <br>
+        It allows you to compare the results of different explainers, as well as explanations
+        of the top ranked predicted model outcomes.
+
+        There are separate sections for each of the different _data modalities_ supported by DIANNA:
+        :gray-background[**Image**], :gray-background[**Text**],
+        :gray-background[**Tabular**], and :gray-background[**Time series**] data. <br>
+        The visual explanation is an overlaid on the data instance :rainbow-background[**heatmap**]
+        highlighting the relevance (attribution) of each data instance _element_ to a selected model's outcome.<br>
+        The data element for images is a (super)pixel, for text a word, for tabular data an attribute,
+        and for time-series a time interval. Attributions can be positive, negative or irrelevant.<br>
+        The dashboard uses the _bwr  (blue white red)_ colormap assigning :blue[**blue**] color to negative
+        relevances, **white** color to near-zero values, and :red[**red**] color to positive values.
+
+        """,
+                    unsafe_allow_html=True)
+
+        st.image(str(data_directory / 'colormap.png'), width = 660)
+
+        st.markdown("""
+        The dashboard _primarily_ illustrates the examples from the DIANNA tutorials.
+
+        It is also possible to upload _own_ trained (ONNX) model, the task-specific class labels,
+        and the data instance for which you would like the model's decision explanation.<br>
+        You can then select the explainer you want to use and set its hyperparameters.
+
+        ### More information
+
+        - [Source code](https://github.com/dianna-ai/dianna)
+        - [Documentation](https://dianna.readthedocs.io/)
+        - [XAI choice](https://blog.esciencecenter.nl/how-to-find-your-artificial-intelligence-explainer-dbb1ac608009)
+        """,
+                    unsafe_allow_html=True)
 
 else:
     # Dynamically import and execute the page

diff --git a/dianna/dashboard/pages/Images.py b/dianna/dashboard/pages/Images.py
@@ -1,3 +1,5 @@
+import base64
+import sys
 import streamlit as st
 from _image_utils import open_image
 from _model_utils import load_labels
@@ -12,9 +14,44 @@
 from dianna.utils.downloader import download
 from dianna.visualization import plot_image
 
+if sys.version_info < (3, 10):
+    from importlib_resources import files
+else:
+    from importlib.resources import files
+
+data_directory = files('dianna.data')
+colormap_path = str(data_directory / 'colormap.png')
+with open(colormap_path, "rb") as img_file:
+    colormap = base64.b64encode(img_file.read()).decode()
+
+def description_explainer(open='open'):
+    """Expandable text section with image."""
+    return (st.markdown(
+            f"""
+            <details {open}>
+            <summary><b>Description of the explanation</b></summary>
+
+            The explanation is visualised as a **relevance heatmap** overlayed on top of the time series. <br>
+            The heatmap consists of the relevance _attributions_ of all individual pixels/super-pixels of the image
+            to a **pretrained model**'s classification. <br>
+            The attribution heatmap can be computed for any class. <br><br>
+
+            The _bwr (blue white red)_ attribution colormap
+            assigns :blue[**blue**] color to negative relevances, **white** color to near-zero values,
+            and :red[**red**] color to positive values. <br><br>
+
+            <img src="data:image/png;base64,{colormap}" alt="Colormap" width="600" ><br>
+            </details>
+            """,
+            unsafe_allow_html=True
+           ),
+           st.text("")
+           )
+
+
 add_sidebar_logo()
 
-st.title('Image explanation')
+st.title('Explaining Image data classification')
 
 st.sidebar.header('Input data')
 
@@ -43,17 +80,20 @@
 
         imagekey = 'Digits_Image_cb'
 
+        description_explainer("")
         st.markdown(
             """
-            This example demonstrates the use of DIANNA on a pretrained binary
-            [MNIST](https://yann.lecun.com/exdb/mnist/) model using a hand-written digit images.
-            The model predict for an image of a hand-written 0 or 1, which of the two it most
-            likely is.
-            This example visualizes the relevance attributions for each pixel/super-pixel by
-            displaying them on top of the input image.
-            """
+            *********************************************************************************************
+            This example demonstrates the use of DIANNA on explaining a
+            [**binary MNIST model**](https://zenodo.org/records/5907177) pretrained on **only** images of
+            the hand-written digits 0 and 1. <br>
+            The model classifies an image of a hand-written digit as displaying 0 or 1.
+            """,
+            unsafe_allow_html=True
         )
+
     else:
+        description_explainer()
         st.info('Select an example in the left panel to coninue')
         st.stop()
 
@@ -75,14 +115,18 @@
 
     imagekey = 'Image_cb'
 
+    if not (image_file and image_model_file and image_label_file):
+        description_explainer()
+        st.info('Add your input data in the left panel to continue')
+        st.stop()
+    else:
+        description_explainer("")
+
 if input_type is None:
+    description_explainer()
     st.info('Select which input type to use in the left panel to continue')
     st.stop()
 
-if not (image_file and image_model_file and image_label_file):
-    st.info('Add your input data in the left panel to continue')
-    st.stop()
-
 image, _ = open_image(image_file)
 
 model = load_model(image_model_file)

diff --git a/dianna/dashboard/pages/Tabular.py b/dianna/dashboard/pages/Tabular.py
@@ -22,7 +22,30 @@
 
 add_sidebar_logo()
 
-st.title('Tabular data explanation')
+def description_explainer(open='open'):
+    """Expandable text section with image."""
+    return (st.markdown(
+            f"""
+            <details {open}>
+            <summary><b>Description of the explanation</b></summary>
+
+            The explanation is visualised as a **relevance bar-chart** for the top (up to 10) most
+            relevant _attributes (features)_. <br>
+            The chart displays the relevance _attributions_ of the individual features of the tabular data
+            to a **pretrained model**'s classification or regression prediciton.
+            The attribution chart can be computed for any predicted outcome.
+
+            The attribution colormap
+            assigns :blue[**blue**] color to negative relevances,
+            and :red[**red**] color to positive values.
+            </details>
+            """,
+            unsafe_allow_html=True
+           ),
+           st.text("")
+           )
+
+st.title('Explaining Tabular data classification/regression')
 
 st.sidebar.header('Input data')
 
@@ -38,12 +61,12 @@
 if input_type == 'Use an example':
     load_example = st.sidebar.radio(
         label='Use example',
-        options=('Sunshine hours prediction', 'Penguin identification'),
+        options=('Sunshine hours prediction (regression)', 'Penguin identification (classification)'),
         index = None,
         on_change = reset_method,
         key='Tabular_load_example')
 
-    if load_example == "Sunshine hours prediction":
+    if load_example == "Sunshine hours prediction (regression)":
         tabular_data_file = download('weather_prediction_dataset_light.csv', 'data')
         tabular_model_file = download('sunshine_hours_regression_model.onnx', 'model')
         tabular_training_data_file = tabular_data_file
@@ -53,45 +76,44 @@
         labels =  None
 
         mode = 'regression'
+        description_explainer("")
         st.markdown(
         """
-        This example demonstrates the use of DIANNA on a pre-trained regression
-        [model to predict tomorrow's sunshine hours](https://zenodo.org/records/10580833)
+        *****************************************************************************
+        This example demonstrates the use of DIANNA on a pre-trained [regression
+        model](https://zenodo.org/records/10580833) to predict tomorrow's sunshine hours
         based on meteorological data from today.
         The model is trained on the
-        [weather prediction dataset](https://zenodo.org/records/5071376).
-        The meteorological data includes for various European cities the
-        cloud coverage,humidity, air pressure, global radiation, precipitation, and
-        mean, min and max temeprature.
-
-        DIANNA's visualisation shows the top most important features contributing to the
-        sunshine hours prediction, where features contrinuting positively are indicated in red
-        and those who contribute negatively in blue.
-        """)
-    elif load_example == 'Penguin identification':
+        [weather prediction dataset](https://zenodo.org/records/5071376). <br>
+        The meteorological data includes measurements (features) of
+        _cloud coverage, humidity, air pressure, global radiation, precipitation_, and
+        _mean, min_ and _max temeprature_
+        for various European cities.
+        """,
+        unsafe_allow_html=True )
+
+    elif load_example == 'Penguin identification (classification)':
         tabular_model_file = download('penguin_model.onnx', 'model')
         data_penguins = sns.load_dataset('penguins')
         labels = data_penguins['species'].unique()
 
         training_data, data = load_penguins(data_penguins)
 
         mode = 'classification'
-
+        description_explainer("")
         st.markdown(
         """
-        This example demonstrates the use of DIANNA on a pre-trained classification
-        [model to classify penguins in to three different species](https://zenodo.org/records/10580743)
-        based on a number of measurable physical characteristics.
+        ****************************************************************************
+        This example demonstrates the use of DIANNA on a pre-trained [classification
+        model](https://zenodo.org/records/10580743) to identify if a penguin belongs to one of three different species
+        based on a number of measurable physical characteristics. <br>
         The model is trained on the
-        [weather prediction dataset](https://zenodo.org/records/5071376). The data is obtained from
-        the Python seaborn package
-        The penguin characteristics include the bill length, bill depth, flipper length and body mass.
-
-        DIANNA's visualisation shows the top most important characteristics contributing to the
-        penguin species classification, where characteristics contributing positively are indicated in red
-        and those who contribute negatively in blue.
-        """)
+        [penguin dataset](https://www.kaggle.com/code/parulpandey/penguin-dataset-the-new-iris).
+        The penguin characteristics include the _bill length_, _bill depth_, _flipper length_, and _body mass_.
+        """,
+        unsafe_allow_html=True)
     else:
+        description_explainer()
         st.info('Select an example in the left panel to coninue')
         st.stop()
 
@@ -103,8 +125,11 @@
     tabular_label_file = st.sidebar.file_uploader('Select labels in case of classification model', type='txt')
 
     if not (tabular_data_file and tabular_model_file and tabular_training_data_file):
+        description_explainer()
         st.info('Add your input data in the left panel to continue')
         st.stop()
+    else:
+        description_explainer("")
 
     data = load_data(tabular_data_file)
     model = load_model(tabular_model_file)
@@ -118,6 +143,7 @@
         mode = 'regression'
 
 if input_type is None:
+    description_explainer()
     st.info('Select which input type to use in the left panel to continue')
     st.stop()
 
@@ -126,7 +152,6 @@
 
 choices = ('RISE', 'LIME', 'KernelSHAP')
 
-st.text("")
 st.text("")
 
 # Get predictions and create parameter box