diff --git a/project/00_5_training_data_exploration.py b/project/00_5_training_data_exploration.py
index 780ccec41..d30fcc489 100644
--- a/project/00_5_training_data_exploration.py
+++ b/project/00_5_training_data_exploration.py
@@ -22,18 +22,7 @@
 # -
 #
 # Does not save filtered data, this is done by splitting notebook. Only visualisations.
-#
-# Expected current format:
-# - wide format (samples x features)
-# > not the default output in MS-based proteomics
-#
-# An example of peptides in wide format would be:
-#
-# | Sample ID | pep A | pep B | pep C | ... |
-# | --- | --- | --- | --- | --- |
-# | sample_01 | 0.1       | 0.2       | 0.3       | ... |
-# | sample_02 | 0.2       | NA       | 0.4       | ... |
-# | sample_03 | 0.3       | 0.2       | 0.1       | ... |
+
 
 # %%
 from __future__ import annotations
@@ -53,6 +42,7 @@
 from vaep.pandas import missing_data
 import vaep.data_handling
 from vaep.analyzers import analyzers
+from vaep.utils import create_random_df
 
 logger = vaep.logging.setup_nb_logger()
 logging.getLogger('fontTools').setLevel(logging.WARNING)
@@ -126,6 +116,16 @@ def get_dynamic_range(min_max):
     return dynamic_range
 
 
+# %% [markdown]
+# Expected current format:
+# - wide format (samples x features)
+# > not the default output in MS-based proteomics
+#
+# An example of peptides in wide format would be:
+
+# %%
+create_random_df(5, 8, prop_na=.2)
+
 # %% [markdown]
 # ## Parameters