diff --git a/Rplot001.jpg b/Rplot001.jpg
new file mode 100644
index 0000000..e69de29
diff --git a/_site/omics/week-5/figures/frog-s30-pca.png b/_site/omics/week-5/figures/frog-s30-pca.png
new file mode 100644
index 0000000..f4f1799
Binary files /dev/null and b/_site/omics/week-5/figures/frog-s30-pca.png differ
diff --git a/_site/omics/week-5/figures/prog-hspc-volcano.png b/_site/omics/week-5/figures/prog-hspc-volcano.png
new file mode 100644
index 0000000..4122bc5
Binary files /dev/null and b/_site/omics/week-5/figures/prog-hspc-volcano.png differ
diff --git a/_site/omics/week-5/figures/prog_hspc-pca.png b/_site/omics/week-5/figures/prog_hspc-pca.png
new file mode 100644
index 0000000..315fc8d
Binary files /dev/null and b/_site/omics/week-5/figures/prog_hspc-pca.png differ
diff --git a/_site/omics/week-5/images/Xenbase-Logo-Medium.png b/_site/omics/week-5/images/Xenbase-Logo-Medium.png
new file mode 100644
index 0000000..2121bb0
Binary files /dev/null and b/_site/omics/week-5/images/Xenbase-Logo-Medium.png differ
diff --git a/_site/omics/week-5/images/frog-heat.png b/_site/omics/week-5/images/frog-heat.png
new file mode 100644
index 0000000..8cf1b5f
Binary files /dev/null and b/_site/omics/week-5/images/frog-heat.png differ
diff --git a/_site/omics/week-5/images/volcano-why.png b/_site/omics/week-5/images/volcano-why.png
new file mode 100644
index 0000000..26b786b
Binary files /dev/null and b/_site/omics/week-5/images/volcano-why.png differ
diff --git a/_site/omics/week-5/images/why_pca_frog.png b/_site/omics/week-5/images/why_pca_frog.png
new file mode 100644
index 0000000..14bb9f6
Binary files /dev/null and b/_site/omics/week-5/images/why_pca_frog.png differ
diff --git a/_site/omics/week-5/images/why_pca_mouse.png b/_site/omics/week-5/images/why_pca_mouse.png
new file mode 100644
index 0000000..04ef843
Binary files /dev/null and b/_site/omics/week-5/images/why_pca_mouse.png differ
diff --git a/_site/omics/week-5/meta/xenbase_info.xlsx b/_site/omics/week-5/meta/xenbase_info.xlsx
new file mode 100644
index 0000000..940c933
Binary files /dev/null and b/_site/omics/week-5/meta/xenbase_info.xlsx differ
diff --git a/_site/omics/week-5/study_before_workshop.html b/_site/omics/week-5/study_before_workshop.html
new file mode 100644
index 0000000..ed3f7a3
--- /dev/null
+++ b/_site/omics/week-5/study_before_workshop.html
@@ -0,0 +1,1123 @@
+<!DOCTYPE html>
+<html lang="en"><head>
+<script src="../../site_libs/clipboard/clipboard.min.js"></script>
+<script src="../../site_libs/quarto-html/tabby.min.js"></script>
+<script src="../../site_libs/quarto-html/popper.min.js"></script>
+<script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
+<link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="../../site_libs/quarto-html/quarto-html.min.css" rel="stylesheet" data-mode="light">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles"><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.2.269">
+<meta name="author" content="Emma Rand">
+<title>Data Analysis for Group Project - Independent Study to prepare for workshop</title>
+<meta name="apple-mobile-web-app-capable" content="yes">
+<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
+<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
+<link rel="stylesheet" href="../../site_libs/revealjs/dist/reset.css">
+<link rel="stylesheet" href="../../site_libs/revealjs/dist/reveal.css">
+<style>
+    code{white-space: pre-wrap;}
+    span.smallcaps{font-variant: small-caps;}
+    div.columns{display: flex; gap: min(4vw, 1.5em);}
+    div.column{flex: auto; overflow-x: auto;}
+    div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+    ul.task-list{list-style: none;}
+    ul.task-list li input[type="checkbox"] {
+      width: 0.8em;
+      margin: 0 0.8em 0.2em -1.6em;
+      vertical-align: middle;
+    }
+    pre > code.sourceCode { white-space: pre; position: relative; }
+    pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
+    pre > code.sourceCode > span:empty { height: 1.2em; }
+    .sourceCode { overflow: visible; }
+    code.sourceCode > span { color: inherit; text-decoration: inherit; }
+    div.sourceCode { margin: 1em 0; }
+    pre.sourceCode { margin: 0; }
+    @media screen {
+    div.sourceCode { overflow: auto; }
+    }
+    @media print {
+    pre > code.sourceCode { white-space: pre-wrap; }
+    pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+    }
+    pre.numberSource code
+      { counter-reset: source-line 0; }
+    pre.numberSource code > span
+      { position: relative; left: -4em; counter-increment: source-line; }
+    pre.numberSource code > span > a:first-child::before
+      { content: counter(source-line);
+        position: relative; left: -1em; text-align: right; vertical-align: baseline;
+        border: none; display: inline-block;
+        -webkit-touch-callout: none; -webkit-user-select: none;
+        -khtml-user-select: none; -moz-user-select: none;
+        -ms-user-select: none; user-select: none;
+        padding: 0 4px; width: 4em;
+        color: #aaaaaa;
+      }
+    pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
+    div.sourceCode
+      { color: #003b4f; background-color: #f1f3f5; }
+    @media screen {
+    pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+    }
+    code span { color: #003b4f; } /* Normal */
+    code span.al { color: #ad0000; } /* Alert */
+    code span.an { color: #5e5e5e; } /* Annotation */
+    code span.at { color: #657422; } /* Attribute */
+    code span.bn { color: #ad0000; } /* BaseN */
+    code span.bu { } /* BuiltIn */
+    code span.cf { color: #003b4f; } /* ControlFlow */
+    code span.ch { color: #20794d; } /* Char */
+    code span.cn { color: #8f5902; } /* Constant */
+    code span.co { color: #5e5e5e; } /* Comment */
+    code span.cv { color: #5e5e5e; font-style: italic; } /* CommentVar */
+    code span.do { color: #5e5e5e; font-style: italic; } /* Documentation */
+    code span.dt { color: #ad0000; } /* DataType */
+    code span.dv { color: #ad0000; } /* DecVal */
+    code span.er { color: #ad0000; } /* Error */
+    code span.ex { } /* Extension */
+    code span.fl { color: #ad0000; } /* Float */
+    code span.fu { color: #4758ab; } /* Function */
+    code span.im { color: #00769e; } /* Import */
+    code span.in { color: #5e5e5e; } /* Information */
+    code span.kw { color: #003b4f; } /* Keyword */
+    code span.op { color: #5e5e5e; } /* Operator */
+    code span.ot { color: #003b4f; } /* Other */
+    code span.pp { color: #ad0000; } /* Preprocessor */
+    code span.sc { color: #5e5e5e; } /* SpecialChar */
+    code span.ss { color: #20794d; } /* SpecialString */
+    code span.st { color: #20794d; } /* String */
+    code span.va { color: #111111; } /* Variable */
+    code span.vs { color: #20794d; } /* VerbatimString */
+    code span.wa { color: #5e5e5e; font-style: italic; } /* Warning */
+    div.csl-bib-body { }
+    div.csl-entry {
+      clear: both;
+    }
+    .hanging div.csl-entry {
+      margin-left:2em;
+      text-indent:-2em;
+    }
+    div.csl-left-margin {
+      min-width:2em;
+      float:left;
+    }
+    div.csl-right-inline {
+      margin-left:2em;
+      padding-left:1em;
+    }
+    div.csl-indent {
+      margin-left: 2em;
+    }
+  </style>
+<link rel="stylesheet" href="../../site_libs/revealjs/dist/theme/quarto.css" id="theme">
+<link href="../../site_libs/revealjs/plugin/quarto-line-highlight/line-highlight.css" rel="stylesheet">
+<link href="../../site_libs/revealjs/plugin/reveal-menu/menu.css" rel="stylesheet">
+<link href="../../site_libs/revealjs/plugin/reveal-menu/quarto-menu.css" rel="stylesheet">
+<link href="../../site_libs/revealjs/plugin/reveal-chalkboard/font-awesome/css/all.css" rel="stylesheet">
+<link href="../../site_libs/revealjs/plugin/reveal-chalkboard/style.css" rel="stylesheet">
+<link href="../../site_libs/revealjs/plugin/quarto-support/footer.css" rel="stylesheet">
+<style type="text/css">
+
+  .callout {
+    margin-top: 1em;
+    margin-bottom: 1em;  
+    border-radius: .25rem;
+  }
+
+  .callout.callout-style-simple { 
+    padding: 0em 0.5em;
+    border-left: solid #acacac .3rem;
+    border-right: solid 1px silver;
+    border-top: solid 1px silver;
+    border-bottom: solid 1px silver;
+    display: flex;
+  }
+
+  .callout.callout-style-default {
+    border-left: solid #acacac .3rem;
+    border-right: solid 1px silver;
+    border-top: solid 1px silver;
+    border-bottom: solid 1px silver;
+  }
+
+  .callout .callout-body-container {
+    flex-grow: 1;
+  }
+
+  .callout.callout-style-simple .callout-body {
+    font-size: 1rem;
+    font-weight: 400;
+  }
+
+  .callout.callout-style-default .callout-body {
+    font-size: 0.9rem;
+    font-weight: 400;
+  }
+
+  .callout.callout-captioned.callout-style-simple .callout-body {
+    margin-top: 0.2em;
+  }
+
+  .callout:not(.callout-captioned) .callout-body {
+      display: flex;
+  }
+
+  .callout:not(.no-icon).callout-captioned.callout-style-simple .callout-content {
+    padding-left: 1.6em;
+  }
+
+  .callout.callout-captioned .callout-header {
+    padding-top: 0.2em;
+    margin-bottom: -0.2em;
+  }
+
+  .callout.callout-captioned .callout-caption  p {
+    margin-top: 0.5em;
+    margin-bottom: 0.5em;
+  }
+    
+  .callout.callout-captioned.callout-style-simple .callout-content  p {
+    margin-top: 0;
+  }
+
+  .callout.callout-captioned.callout-style-default .callout-content  p {
+    margin-top: 0.7em;
+  }
+
+  .callout.callout-style-simple div.callout-caption {
+    border-bottom: none;
+    font-size: .9rem;
+    font-weight: 600;
+    opacity: 75%;
+  }
+
+  .callout.callout-style-default  div.callout-caption {
+    border-bottom: none;
+    font-weight: 600;
+    opacity: 85%;
+    font-size: 0.9rem;
+    padding-left: 0.5em;
+    padding-right: 0.5em;
+  }
+
+  .callout.callout-style-default div.callout-content {
+    padding-left: 0.5em;
+    padding-right: 0.5em;
+  }
+
+  .callout.callout-style-simple .callout-icon::before {
+    height: 1rem;
+    width: 1rem;
+    display: inline-block;
+    content: "";
+    background-repeat: no-repeat;
+    background-size: 1rem 1rem;
+  }
+
+  .callout.callout-style-default .callout-icon::before {
+    height: 0.9rem;
+    width: 0.9rem;
+    display: inline-block;
+    content: "";
+    background-repeat: no-repeat;
+    background-size: 0.9rem 0.9rem;
+  }
+
+  .callout-caption {
+    display: flex
+  }
+    
+  .callout-icon::before {
+    margin-top: 1rem;
+    padding-right: .5rem;
+  }
+
+  .callout.no-icon::before {
+    display: none !important;
+  }
+
+  .callout.callout-captioned .callout-body > .callout-content > :last-child {
+    margin-bottom: 0.5rem;
+  }
+
+  .callout.callout-captioned .callout-icon::before {
+    margin-top: .5rem;
+    padding-right: .5rem;
+  }
+
+  .callout:not(.callout-captioned) .callout-icon::before {
+    margin-top: 1rem;
+    padding-right: .5rem;
+  }
+
+  /* Callout Types */
+
+  div.callout-note {
+    border-left-color: #4582ec !important;
+  }
+
+  div.callout-note .callout-icon::before {
+    background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAIKADAAQAAAABAAAAIAAAAACshmLzAAAEU0lEQVRYCcVXTWhcVRQ+586kSUMMxkyaElstCto2SIhitS5Ek8xUKV2poatCcVHtUlFQk8mbaaziwpWgglJwVaquitBOfhQXFlqlzSJpFSpIYyXNjBNiTCck7x2/8/LeNDOZxDuEkgOXe++553zfefee+/OYLOXFk3+1LLrRdiO81yNqZ6K9cG0P3MeFaMIQjXssE8Z1JzLO9ls20MBZX7oG8w9GxB0goaPrW5aNMp1yOZIa7Wv6o2ykpLtmAPs/vrG14Z+6d4jpbSKuhdcSyq9wGMPXjonwmESXrriLzFGOdDBLB8Y6MNYBu0dRokSygMA/mrun8MGFN3behm6VVAwg4WR3i6FvYK1T7MHo9BK7ydH+1uurECoouk5MPRyVSBrBHMYwVobG2aOXM07sWrn5qgB60rc6mcwIDJtQrnrEr44kmy+UO9r0u9O5/YbkS9juQckLed3DyW2XV/qWBBB3ptvI8EUY3I9p/67OW+g967TNr3Sotn3IuVlfMLVnsBwH4fsnebJvyGm5GeIUA3jljERmrv49SizPYuq+z7c2H/jlGC+Ghhupn/hcapqmcudB9jwJ/3jvnvu6vu5lVzF1fXyZuZZ7U8nRmVzytvT+H3kilYvH09mLWrQdwFSsFEsxFVs5fK7A0g8gMZjbif4ACpKbjv7gNGaD8bUrlk8x+KRflttr22JEMRUbTUwwDQScyzPgedQHZT0xnx7ujw2jfVfExwYHwOsDTjLdJ2ebmeQIlJ7neo41s/DrsL3kl+W2lWvAga0tR3zueGr6GL78M3ifH0rGXrBC2aAR8uYcIA5gwV8zIE8onoh8u0Fca/ciF7j1uOzEnqcIm59sEXoGc0+z6+H45V1CvAvHcD7THztu669cnp+L0okAeIc6zjbM/24LgGM1gZk7jnRu1aQWoU9sfUOuhrmtaPIO3YY1KLLWZaEO5TKUbMY5zx8W9UJ6elpLwKXbsaZ4EFl7B4bMtDv0iRipKoDQT2sNQI9b1utXFdYisi+wzZ/ri/1m7QfDgEuvgUUEIJPq3DhX/5DWNqIXDOweC2wvIR90Oq3lDpdMIgD2r0dXvGdsEW5H6x6HLRJYU7C69VefO1x8Gde1ZFSJLfWS1jbCnhtOPxmpfv2LXOA2Xk2tvnwKKPFuZ/oRmwBwqRQDcKNeVQkYcOjtWVBuM/JuYw5b6isojIkYxyYAFn5K7ZBF10fea52y8QltAg6jnMqNHFBmGkQ1j+U43HMi2xMar1Nv0zGsf1s8nUsmUtPOOrbFIR8bHFDMB5zL13Gmr/kGlCkUzedTzzmzsaJXhYawnA3UmARpiYj5ooJZiUoxFRtK3X6pgNPv+IZVPcnwbOl6f+aBaO1CNvPW9n9LmCp01nuSaTRF2YxHqZ8DYQT6WsXT+RD6eUztwYLZ8rM+rcPxamv1VQzFUkzFXvkiVrySGQgJNvXHJAxiU3/NwiC03rSf05VBaPtu/Z7/B8Yn/w7eguloAAAAAElFTkSuQmCC');
+  }
+
+  div.callout-note.callout-style-default .callout-caption {
+    background-color: #dae6fb
+  }
+
+  div.callout-important {
+    border-left-color: #d9534f !important;
+  }
+
+  div.callout-important .callout-icon::before {
+    background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAIKADAAQAAAABAAAAIAAAAACshmLzAAAEKklEQVRYCcVXTWhcVRS+575MJym48A+hSRFr00ySRQhURRfd2HYjk2SSTokuBCkU2o0LoSKKraKIBTcuFCoidGFD08nkBzdREbpQ1EDNIv8qSGMFUboImMSZd4/f9zJv8ibJMC8xJQfO3HPPPef7zrvvvnvviIkpC9nsw0UttFunbUhpFzFtarSd6WJkStVMw5xyVqYTvkwfzuf/5FgtkVoB0729j1rjXwThS7Vio+Mo6DNnvLfahoZ+i/o32lULuJ3NNiz7q6+pyAUkJaFF6JwaM2lUJlV0MlnQn5aTRbEu0SEqHUa0A4AdiGuB1kFXRfVyg5d87+Dg4DL6m2TLAub60ilj7A1Ec4odSAc8X95sHh7+ZRPCFo6Fnp7HfU/fBng/hi10CjCnWnJjsxvDNxWw0NfV6Rv5GgP3I3jGWXumdTD/3cbEOP2ZbOZp69yniG3FQ9z1jD7bnBu9Fc2tKGC2q+uAJOQHBDRiZX1x36o7fWBs7J9ownbtO+n0/qWkvW7UPIfc37WgT6ZGR++EOJyeQDSb9UB+DZ1G6DdLDzyS+b/kBCYGsYgJbSQHuThGKRcw5xdeQf8YdNHsc6ePXrlSYMBuSIAFTGAtQo+VuALo4BX83N190NWZWbynBjhOHsmNfFWLeL6v+ynsA58zDvvAC8j5PkbOcXCMg2PZFk3q8MjI7WAG/Dp9AwP7jdGBOOQkAvlFUB+irtm16I1Zw9YBcpGTGXYmk3kQIC/Cds55l+iMI3jqhjAuaoe+am2Jw5GT3Nbz3CkE12NavmzN5+erJW7046n/CH1RO/RVa8lBLozXk9uqykkGAyRXLWlLv5jyp4RFsG5vGVzpDLnIjTWgnRy2Rr+tDKvRc7Y8AyZq10jj8DqXdnIRNtFZb+t/ZRtXcDiVnzpqx8mPcDWxgARUqx0W1QB9MeUZiNrV4qP+Ehc+BpNgATsTX8ozYKL2NtFYAHc84fG7ndxUPr+AR/iQSns7uSUufAymwDOb2+NjK27lEFocm/EE2WpyIy/Hi66MWuMKJn8RvxIcj87IM5Vh9663ziW36kR0HNenXuxmfaD8JC7tfKbrhFr7LiZCrMjrzTeGx+PmkosrkNzW94ObzwocJ7A1HokLolY+AvkTiD/q1H0cN48c5EL8Crkttsa/AXQVDmutfyku0E7jShx49XqV3MFK8IryDhYVbj7Sj2P2eBxwcXoe8T8idsKKPRcnZw1b+slFTubwUwhktrfnAt7J++jwQtLZcm3sr9LQrjRzz6cfMv9aLvgmnAGvpoaGLxM4mAEaLV7iAzQ3oU0IvD5x9ix3yF2RAAuYAOO2f7PEFWCXZ4C9Pb2UsgDeVnFSpbFK7/IWu7TPTvBqzbGdCHOJQSxiEjt6IyZmxQyEJHv6xyQsYk//moVFsN2zP6fRImjfq7/n/wFDguUQFNEwugAAAABJRU5ErkJggg==');
+  }
+
+  div.callout-important.callout-style-default .callout-caption {
+    background-color: #f7dddc
+  }
+
+  div.callout-warning {
+    border-left-color: #f0ad4e !important;
+  }
+
+  div.callout-warning .callout-icon::before {
+    background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAIKADAAQAAAABAAAAIAAAAACshmLzAAAETklEQVRYCeVWW2gcVRg+58yaTUnizqbipZeX4uWhBEniBaoUX1Ioze52t7sRq6APio9V9MEaoWlVsFasRq0gltaAPuxms8lu0gcviE/FFOstVbSIxgcv6SU7EZqmdc7v9+9mJtNks51NTUH84ed889/PP+cmxP+d5FIbMJmNbpREu4WUkiTtCicKny0l1pIKmBzovF2S+hIJHX8iEu3hZJ5lNZGqyRrGSIQpq15AzF28jgpeY6yk6GVdrfFqdrD6Iw+QlB8g0YS2g7dyQmXM/IDhBhT0UCiRf59lfqmmDvzRt6kByV/m4JjtzuaujMUM2c5Z2d6JdKrRb3K2q6mA+oYVz8JnDdKPmmNthzkAk/lN63sYPgevrguc72aZX/L9C6x09GYyxBgCX4NlvyGUHOKELlm5rXeR1kchuChJt4SSwyddZRXgvwMGvYo4QSlk3/zkHD8UHxwVJA6zjZZqP8v8kK8OWLnIZtLyCAJagYC4rTGW/9Pqj92N/c+LUaAj27movwbi19tk/whRCIE7Q9vyI6yvRpftAKVTdUjOW40X3h5OXsKCdmFcx0xlLJoSuQngnrJe7Kcjm4OMq9FlC7CMmScQANuNvjfP3PjGXDBaUQmbp296S5L4DrpbrHN1T87ZVEZVCzg1FF0Ft+dKrlLukI+/c9ENo+TvlTDbYFvuKPtQ9+l052rXrgKoWkDAFnvh0wTOmYn8R5f4k/jN/fZiCM1tQx9jQQ4ANhqG4hiL0qIFTGViG9DKB7GYzgubnpofgYRwO+DFjh0Zin2m4b/97EDkXkc+f6xYAPX0KK2I/7fUQuwzuwo/L3AkcjugPNixC8cHf0FyPjWlItmLxWw4Ou9YsQCr5fijMGoD/zpdRy95HRysyXA74MWOnscpO4j2y3HAVisw85hX5+AFBRSHt4ShfLFkIMXTqyKFc46xdzQM6XbAi702a7sy04J0+feReMFKp5q9esYLCqAZYw/k14E/xcLLsFElaornTuJB0svMuJINy8xkIYuL+xPAlWRceH6+HX7THJ0djLUom46zREu7tTkxwmf/FdOZ/sh6Q8qvEAiHpm4PJ4a/doJe0gH1t+aHRgCzOvBvJedEK5OFE5jpm4AGP2a8Dxe3gGJ/pAutug9Gp6he92CsSsWBaEcxGx0FHytmIpuqGkOpldqNYQK8cSoXvd+xLxXADw0kf6UkJNFtdo5MOgaLjiQOQHcn+A6h5NuL2s0qsC2LOM75PcF3yr5STuBSAcGG+meA14K/CI21HcS4LBT6tv0QAh8Dr5l93AhZzG5ZJ4VxAqdZUEl9z7WJ4aN+svMvwHHL21UKTd1mqvChH7/Za5xzXBBKrUcB0TQ+Ulgkfbi/H/YT5EptrGzsEK7tR1B7ln9BBwckYfMiuSqklSznIuoIIOM42MQO+QnduCoFCI0bpkzjCjddHPN/F+2Yu+sd9bKNpVwHhbS3LluK/0zgfwD0xYI5dXuzlQAAAABJRU5ErkJggg==');
+  }
+
+  div.callout-warning.callout-style-default .callout-caption {
+    background-color: #fcefdc
+  }
+
+  div.callout-tip {
+    border-left-color: #02b875 !important;
+  }
+
+  div.callout-tip .callout-icon::before {
+    background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAIKADAAQAAAABAAAAIAAAAACshmLzAAADr0lEQVRYCe1XTWgTQRj9ZjZV8a9SPIkKgj8I1bMHsUWrqYLVg4Ue6v9BwZOxSYsIerFao7UiUryIqJcqgtpimhbBXoSCVxUFe9CTiogUrUp2Pt+3aUI2u5vdNh4dmMzOzHvvezuz8xNFM0mjnbXaNu1MvFWRXkXEyE6aYOYJpdW4IXuA4r0fo8qqSMDBU0v1HJUgVieAXxzCsdE/YJTdFcVIZQNMyhruOMJKXYFoLfIfIvVIMWdsrd+Rpd86ZmyzzjJmLStqRn0v8lzkb4rVIXvnpScOJuAn2ACC65FkPzEdEy4TPWRLJ2h7z4cArXzzaOdKlbOvKKX25Wl00jSnrwVxAg3o4dRxhO13RBSdNvH0xSARv3adTXbBdTf64IWO2vH0LT+cv4GR1DJt+DUItaQogeBX/chhbTBxEiZ6gftlDNXTrvT7co4ub5A6gp9HIcHvzTa46OS5fBeP87Qm0fQkr4FsYgVQ7Qg+ZayaDg9jhg1GkWj8RG6lkeSacrrHgDaxdoBiZPg+NXV/KifMuB6//JmYH4CntVEHy/keA6x4h4CU5oFy8GzrBS18cLJMXcljAKB6INjWsRcuZBWVaS3GDrqB7rdapVIeA+isQ57Eev9eCqzqOa81CY05VLd6SamW2wA2H3SiTbnbSxmzfp7WtKZkqy4mdyAlGx7ennghYf8voqp9cLSgKdqNfa6RdRsAAkPwRuJZNbpByn+RrJi1RXTwdi8RQF6ymDwGMAtZ6TVE+4uoKh+MYkcLsT0Hk8eAienbiGdjJHZTpmNjlbFJNKDVAp2fJlYju6IreQxQ08UJDNYdoLSl6AadO+fFuCQqVMB1NJwPm69T04Wv5WhfcWyfXQB+wXRs1pt+nCknRa0LVzSA/2B+a9+zQJadb7IyyV24YAxKp2Jqs3emZTuNnKxsah+uabKbMk7CbTgJx/zIgQYErIeTKRQ9yD9wxVof5YolPHqaWo7TD6tJlh7jQnK5z2n3+fGdggIOx2kaa2YI9QWarc5Ce1ipNWMKeSG4DysFF52KBmTNMmn5HqCFkwy34rDg05gDwgH3bBi+sgFhN/e8QvRn8kbamCOhgrZ9GJhFDgfcMHzFb6BAtjKpFhzTjwv1KCVuxHvCbsSiEz4CANnj84cwHdFXAbAOJ4LTSAawGWFn5tDhLMYz6nWeU2wJfIhmIJBefcd/A5FWQWGgrWzyORZ3Q6HuV+Jf0Bj+BTX69fm1zWgK7By1YTXchFDORywnfQ7GpzOo6S+qECrsx2ifVQAAAABJRU5ErkJggg==');
+  }
+
+  div.callout-tip.callout-style-default .callout-caption {
+    background-color: #ccf1e3
+  }
+
+  div.callout-caution {
+    border-left-color: #fd7e14 !important;
+  }
+
+  div.callout-caution .callout-icon::before {
+    background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAIKADAAQAAAABAAAAIAAAAACshmLzAAACV0lEQVRYCdVWzWoUQRCuqp2ICBLJXgITZL1EfQDBW/bkzUMUD7klD+ATSHBEfAIfQO+iXsWDxJsHL96EHAwhgzlkg8nBg25XWb0zIb0zs9muYYWkoKeru+vn664fBqElyZNuyh167NXJ8Ut8McjbmEraKHkd7uAnAFku+VWdb3reSmRV8PKSLfZ0Gjn3a6Xlcq9YGb6tADjn+lUfTXtVmaZ1KwBIvFI11rRXlWlatwIAAv2asaa9mlB9wwygiDX26qaw1yYPzFXg2N1GgG0FMF8Oj+VIx7E/03lHx8UhvYyNZLN7BwSPgekXXLribw7w5/c8EF+DBK5idvDVYtEEwMeYefjjLAdEyQ3M9nfOkgnPTEkYU+sxMq0BxNR6jExrAI31H1rzvLEfRIdgcv1XEdj6QTQAS2wtstEALLG1yEZ3QhH6oDX7ExBSFEkFINXH98NTrme5IOaaA7kIfiu2L8A3qhH9zRbukdCqdsA98TdElyeMe5BI8Rs2xHRIsoTSSVFfCFCWGPn9XHb4cdobRIWABNf0add9jakDjQJpJ1bTXOJXnnRXHRf+dNL1ZV1MBRCXhMbaHqGI1JkKIL7+i8uffuP6wVQAzO7+qVEbF6NbS0LJureYcWXUUhH66nLR5rYmva+2tjRFtojkM2aD76HEGAD3tPtKM309FJg5j/K682ywcWJ3PASCcycH/22u+Bh7Aa0ehM2Fu4z0SAE81HF9RkB21c5bEn4Dzw+/qNOyXr3DCTQDMBOdhi4nAgiFDGCinIa2owCEChUwD8qzd03PG+qdW/4fDzjUMcE1ZpIAAAAASUVORK5CYII=');
+  }
+
+  div.callout-caution.callout-style-default .callout-caption {
+    background-color: #ffe5d0
+  }
+
+  </style>
+<style type="text/css">
+    .reveal div.sourceCode {
+      margin: 0;
+      overflow: auto;
+    }
+    .reveal div.hanging-indent {
+      margin-left: 1em;
+      text-indent: -1em;
+    }
+    .reveal .slide:not(.center) {
+      height: 100%;
+    }
+    .reveal .slide.scrollable {
+      overflow-y: auto;
+    }
+    .reveal .footnotes {
+      height: 100%;
+      overflow-y: auto;
+    }
+    .reveal .slide .absolute {
+      position: absolute;
+      display: block;
+    }
+    .reveal .footnotes ol {
+      counter-reset: ol;
+      list-style-type: none; 
+      margin-left: 0;
+    }
+    .reveal .footnotes ol li:before {
+      counter-increment: ol;
+      content: counter(ol) ". "; 
+    }
+    .reveal .footnotes ol li > p:first-child {
+      display: inline-block;
+    }
+    .reveal .slide ul,
+    .reveal .slide ol {
+      margin-bottom: 0.5em;
+    }
+    .reveal .slide ul li,
+    .reveal .slide ol li {
+      margin-top: 0.4em;
+      margin-bottom: 0.2em;
+    }
+    .reveal .slide ul[role="tablist"] li {
+      margin-bottom: 0;
+    }
+    .reveal .slide ul li > *:first-child,
+    .reveal .slide ol li > *:first-child {
+      margin-block-start: 0;
+    }
+    .reveal .slide ul li > *:last-child,
+    .reveal .slide ol li > *:last-child {
+      margin-block-end: 0;
+    }
+    .reveal .slide .columns:nth-child(3) {
+      margin-block-start: 0.8em;
+    }
+    .reveal blockquote {
+      box-shadow: none;
+    }
+    .reveal .tippy-content>* {
+      margin-top: 0.2em;
+      margin-bottom: 0.7em;
+    }
+    .reveal .tippy-content>*:last-child {
+      margin-bottom: 0.2em;
+    }
+    .reveal .slide > img.stretch.quarto-figure-center,
+    .reveal .slide > img.r-stretch.quarto-figure-center {
+      display: block;
+      margin-left: auto;
+      margin-right: auto; 
+    }
+    .reveal .slide > img.stretch.quarto-figure-left,
+    .reveal .slide > img.r-stretch.quarto-figure-left  {
+      display: block;
+      margin-left: 0;
+      margin-right: auto; 
+    }
+    .reveal .slide > img.stretch.quarto-figure-right,
+    .reveal .slide > img.r-stretch.quarto-figure-right  {
+      display: block;
+      margin-left: auto;
+      margin-right: 0; 
+    }
+  </style>
+</head>
+<body class="quarto-light">
+  <div class="reveal">
+    <div class="slides">
+
+<section id="title-slide" class="quarto-title-block center"><h1 class="title">Independent Study to prepare for workshop</h1>
+  <p class="subtitle">Omics 3: Visualising and Interpreting</p>
+
+<div class="quarto-title-authors">
+<div class="quarto-title-author">
+<div class="quarto-title-author-name">
+Emma Rand 
+</div>
+</div>
+</div>
+
+  <p class="date">23 October, 2023</p>
+</section><section id="overview" class="slide level2"><h2>Overview</h2>
+<p>In these slides we will:</p>
+<div>
+<ul>
+<li class="fragment"><p>Check where you are</p></li>
+<li class="fragment">
+<p>learn some concepts used omics visualisation</p>
+<ul>
+<li class="fragment">Principle Component Analysis (PCA)</li>
+<li class="fragment">Volcano plots</li>
+<li class="fragment">Heatmaps</li>
+</ul>
+</li>
+<li class="fragment"><p>Find out what packages to install before the workshop</p></li>
+</ul>
+</div>
+</section><section><section id="where-should-you-be" class="title-slide slide level1 center"><h1>Where should you be?</h1>
+
+</section><section id="what-we-did-in-omics-2-statistical-analysis" class="slide level2"><h2>What we did in Omics 2: Statistical Analysis</h2>
+<div>
+<ul>
+<li class="fragment"><p>carried out differential expression analysis</p></li>
+<li class="fragment"><p>found genes not expressed at all, or expressed in one group only</p></li>
+<li class="fragment"><p>Saved results files</p></li>
+</ul>
+</div>
+</section><section id="where-should-you-be-1" class="slide level2"><h2>Where should you be?</h2>
+<p>After the <a href="../week-4/workshop.html">Omics 2: 👋 Statistical Analysis Workshop</a> including:</p>
+<ul>
+<li><p><a href="../week-4/workshop.html#look-after-future-you">🤗 Look after future you!</a> and</p></li>
+<li><p>the <a href="../week-4/study_after_workshop.html">Independent Study to consolidate</a>, you should have:</p></li>
+</ul></section><section id="frogs" class="slide level2"><h2>🐸 Frogs</h2>
+<div style="font-size: 70%;">
+<ul>
+<li>An RStudio Project called <code>frogs-88H</code> which contains:
+<ul>
+<li>Raw data (S14, S20 and S30)</li>
+<li>Processed data (<code>s30_filtered.csv</code>, <code>s30_summary_gene.csv</code>, <code>s30_summary_gene_filtered.csv</code>, <code>s30_summary_samp.csv</code> and equivalents for S14 <em>OR</em> S20)</li>
+<li>Results files (<code>s30_fgf_only.csv</code>, <code>S30_normalised_counts.csv</code>, <code>S30_results.csv</code> and equivalents for S14 <em>OR</em> S20)<br>
+</li>
+<li>Two scripts called <code>cont-fgf-s30.R</code> and either <code>cont-fgf-s20.R</code> <em>OR</em> <code>cont-fgf-s14.R</code>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<p>Files should be organised into folders. Code should well commented and easy to read.</p>
+</section><section id="mice" class="slide level2"><h2>🐭 Mice</h2>
+<div style="font-size: 70%;">
+<ul>
+<li>An RStudio Project called <code>mice-88H</code> which contains
+<ul>
+<li>Raw data (hspc, prog, lthsc)</li>
+<li>Processed data (<code>hspc_summary_gene.csv</code>, <code>hspc_summary_samp.csv</code>, <code>prog_summary_gene.csv</code>, <code>prog_summary_samp.csv</code>, <code>lthsc_summary_gene.csv</code>, <code>lthsc_summary_samp.csv</code>)</li>
+</ul>
+</li>
+<li>Results files (<code>prog_hspc_results.csv</code> and an equivalent for lthsc vs prog or hspc vs lthsc)</li>
+<li>Two scripts called <code>hspc-prog.R</code> and either <code>hspc-lthsc.R</code> <em>OR</em> <code>prog-lthsc.R</code>
+</li>
+</ul>
+</div>
+<p>Files should be organised into folders. Code should well commented and easy to read.</p>
+</section><section id="section" class="slide level2"><h2>🍂</h2>
+<p>Either of the other examples.</p>
+</section><section id="if-you-do-not-have-those" class="slide level2"><h2>If you do not have those</h2>
+<p>Go through:</p>
+<ul>
+<li><p><a href="../week-4/workshop.html">Omics 2: Statistical Analysis</a> including:</p></li>
+<li><p><a href="../week-4/workshop.html#look-after-future-you">🤗 Look after future you!</a> and</p></li>
+<li><p>the <a href="../week-4/study_after_workshop.html">Independent Study to consolidate</a></p></li>
+</ul></section></section><section><section id="examine-the-results-files" class="title-slide slide level1 center"><h1>Examine the results files</h1>
+
+</section><section id="examine-the-results-files-1" class="slide level2"><h2>Examine the results files</h2>
+<p>Remind yourself of the key columns you have in the results files:</p>
+<ul>
+<li>a log<sub>2</sub> fold change</li>
+<li>an unadjusted <em>p</em>-value</li>
+<li>a <em>p</em> value adjusted for multiple testing (<code>FDR</code> or <code>padj</code>)</li>
+<li>a gene id</li>
+</ul></section><section id="frogs-1" class="slide level2"><h2>🐸 Frogs</h2>
+<div class="cell">
+<div class="cell-output cell-output-stdout">
+<pre><code>Rows: 10,136
+Columns: 7
+$ baseMean        &lt;dbl&gt; 237.553928, 531.565700, 86.392830, 49.813502, 419.9983…
+$ log2FoldChange  &lt;dbl&gt; 0.096601855, -0.089588528, -0.192811203, -0.008858703,…
+$ lfcSE           &lt;dbl&gt; 0.2079396, 0.1557384, 0.3253216, 0.4342614, 0.1685420,…
+$ stat            &lt;dbl&gt; 0.46456683, -0.57525007, -0.59267874, -0.02039947, -0.…
+$ pvalue          &lt;dbl&gt; 0.64224169, 0.56512218, 0.55339617, 0.98372471, 0.8699…
+$ padj            &lt;dbl&gt; 0.9998970, 0.9998970, 0.9998970, 0.9998970, 0.9998970,…
+$ xenbase_gene_id &lt;chr&gt; "XB-GENE-1000007", "XB-GENE-1000023", "XB-GENE-1000062…</code></pre>
+</div>
+</div>
+<div class="fragment">
+<ul>
+<li>
+<code>baseMean</code> is the mean of the normalised counts for the gene across all samples</li>
+<li>
+<code>lfcSE</code> standard error of the fold change</li>
+<li>
+<code>stat</code> is the test statistic (the Wald statistic)</li>
+<li>Generated by <strong><code>DESeq2</code></strong> <span class="citation" data-cites="DESeq2">(<a href="#/references" role="doc-biblioref" onclick="">Love, Huber, and Anders 2014</a>)</span>
+</li>
+</ul>
+</div>
+</section><section id="mice-1" class="slide level2"><h2>🐭 Mice</h2>
+<div class="cell">
+<div class="cell-output cell-output-stdout">
+<pre><code>Rows: 280
+Columns: 6
+$ Top             &lt;dbl&gt; 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
+$ p.value         &lt;dbl&gt; 7.038138e-117, 4.736622e-90, 1.832630e-88, 4.211954e-7…
+$ FDR             &lt;dbl&gt; 1.970679e-114, 6.631271e-88, 1.710455e-86, 2.948368e-7…
+$ summary.logFC   &lt;dbl&gt; 1.596910, 3.035165, 3.261056, -2.146491, -3.056730, 3.…
+$ logFC.hspc      &lt;dbl&gt; 1.596910, 3.035165, 3.261056, -2.146491, -3.056730, 3.…
+$ ensembl_gene_id &lt;chr&gt; "ENSMUSG00000028639", "ENSMUSG00000024053", "ENSMUSG00…</code></pre>
+</div>
+</div>
+<div class="fragment">
+<ul>
+<li>Top is the rank of the gene ordered by the <em>p</em>-value (smallest first)</li>
+<li>
+<code>summary.logFC</code> and <code>logFC.hspc</code> give the same value (in this case since comparing two cell types)</li>
+<li>generated by <strong><code>scran</code></strong> <span class="citation" data-cites="scran">(<a href="#/references" role="doc-biblioref" onclick="">Lun, McCarthy, and Marioni 2016</a>)</span>
+</li>
+</ul>
+</div>
+</section></section><section><section id="adding-gene-information" class="title-slide slide level1 center"><h1>Adding gene information</h1>
+
+</section><section id="adding-gene-information-1" class="slide level2"><h2>Adding gene information</h2>
+<div>
+<ul>
+<li class="fragment"><p>The gene id is difficult to interpret in plots/tables</p></li>
+<li class="fragment"><p>Therefore we need to add information such as the gene name and a description to the results</p></li>
+<li class="fragment"><p>For the 🐸 Frog data information comes from Xenbase <span class="citation" data-cites="fisher2023">(<a href="#/references" role="doc-biblioref" onclick="">Fisher et al. 2023</a>)</span></p></li>
+<li class="fragment"><p>For the 🐭 Mice data information comes from Ensembl <span class="citation" data-cites="birney2004">(<a href="#/references" role="doc-biblioref" onclick="">Birney et al. 2004</a>)</span></p></li>
+</ul>
+</div>
+</section><section id="xenbase" class="slide level2"><h2>🐸 Xenbase</h2>
+
+<img data-src="images/Xenbase-Logo-Medium.png" width="800" class="r-stretch quarto-figure-center"><p class="caption">xenbase logo</p><p><a href="http://www.xenbase.org/">Xenbase</a> is a model organism database that provides genomic, molecular, and developmental biology information about <em>Xenopus laevis</em> and <em>Xenopus tropicalis</em>.</p>
+<div class="fragment">
+<p>It took me some time to find the information you need.</p>
+</div>
+</section><section id="xenbase-1" class="slide level2"><h2>🐸 Xenbase</h2>
+<div>
+<ul>
+<li class="fragment"><p>I got the information from the <a href="https://www.xenbase.org/xenbase/static-xenbase/ftpDatafiles.jsp">Xenbase information pages</a> under Data Reports | Gene Information</p></li>
+<li class="fragment"><p>This is listed: Xenbase Gene Product Information [readme] <a href="https://download.xenbase.org/xenbase/GenePageReports/xenbase.gpi.gz">gzipped gpi (tab separated)</a></p></li>
+<li class="fragment"><p>Click on the readme link to see the file format and columns</p></li>
+<li class="fragment"><p>I downloaded <a href="https://download.xenbase.org/xenbase/GenePageReports/xenbase.gpi.gz">xenbase.gpi.gz</a>, unzipped it, removed header lines and the <em>Xenopus tropicalis</em> (taxon:8364) entries and saved it as <a href="meta/xenbase_info.xlsx">xenbase_info.xlsx</a></p></li>
+<li class="fragment"><p>In the workshop you will import this file and merge the information with the results file</p></li>
+</ul>
+</div>
+</section><section id="ensembl" class="slide level2"><h2>🐭 Ensembl</h2>
+<div>
+<ul>
+<li class="fragment"><p><a href="https://www.ensembl.org/index.html">Ensembl</a> creates, integrates and distributes reference datasets and analysis tools that enable genomics</p></li>
+<li class="fragment"><p><a href="https://grch37.ensembl.org/info/data/biomart/index.html">BioMart</a> provides a access to these large datasets</p></li>
+<li class="fragment"><p><strong><code>biomaRt</code></strong> <span class="citation" data-cites="biomaRt">(<a href="#/references" role="doc-biblioref" onclick="">Durinck et al. 2009</a>)</span> is a Bioconductor package gives you programmatic access to BioMart.</p></li>
+<li class="fragment"><p>In the workshop you use this package to get information you can merge with the results file</p></li>
+</ul>
+</div>
+</section></section><section><section id="plots" class="title-slide slide level1 center"><h1>Plots</h1>
+
+</section><section id="what-is-the-purpose-of-an-omics-plot" class="slide level2"><h2>What is the purpose of an Omics plot?</h2>
+<div>
+<ul>
+<li class="fragment"><p>In general, we plot data to help us summarise and understand it</p></li>
+<li class="fragment"><p>This is especially import for omics data where we have a very large number of variables and often a large number of observations</p></li>
+<li class="fragment"><p>We will look at three plots very commonly used in omics analysis: Principal Component Analysis (PCA) plot, Heatmaps and Volcano Plots</p></li>
+</ul>
+</div>
+</section></section><section><section id="principal-component-analysis-pca" class="title-slide slide level1 center"><h1>Principal Component Analysis (PCA)</h1>
+
+</section><section id="pca" class="slide level2"><h2>PCA</h2>
+<div>
+<ul>
+<li class="fragment"><p>Principal Component Analysis is an unsupervised machine learning technique</p></li>
+<li class="fragment"><p>Unsupervised methods<sup>1</sup> are unsupervised in that they do not use/optimise to a particular output. The goal is to uncover structure. They do not test hypotheses</p></li>
+<li class="fragment"><p>It is often used to visualise high dimensional data because it is a dimension reduction technique</p></li>
+</ul>
+</div>
+<aside><ol class="aside-footnotes"><li id="fn1"><p>You may wish to read a previous introduction to unsupervised methods I have written <a href="https://3mmarand.github.io/BIO00058M-Data-science-2020/slides/05_intro_to_ML_unsupervised.html#1">An introduction to Machine Learning: Unsupervised methods</a> <span class="citation" data-cites="rand2021">(<a href="#/references" role="doc-biblioref" onclick="">Rand 2021</a>)</span></p></li></ol></aside></section><section id="pca-1" class="slide level2"><h2>PCA</h2>
+<div>
+<ul>
+<li class="fragment"><p>It takes a large number of continuous variables (like gene expression) and reduces them to a smaller number of variables (called principal components) that explain most of the variation in the data</p></li>
+<li class="fragment"><p>The principal components can be plotted to see how samples cluster together</p></li>
+</ul>
+</div>
+</section><section id="pca-2" class="slide level2"><h2>PCA</h2>
+<ul>
+<li>To see if samples cluster as we would expect, we might plot the expression of one gene against another</li>
+</ul>
+<div class="quarto-layout-panel">
+<div class="quarto-layout-row quarto-layout-valign-top">
+<div class="quarto-layout-cell" style="flex-basis: 50.0%;justify-content: center;">
+<div class="quarto-figure quarto-figure-center">
+<figure><p><img data-src="images/why_pca_frog.png" width="300"></p>
+<p></p><figcaption>Samples</figcaption><p></p>
+</figure>
+</div>
+</div>
+<div class="quarto-layout-cell" style="flex-basis: 50.0%;justify-content: center;">
+<div class="quarto-figure quarto-figure-center">
+<figure><p><img data-src="images/why_pca_mouse.png" width="300"></p>
+<p></p><figcaption>Cells</figcaption><p></p>
+</figure>
+</div>
+</div>
+</div>
+</div>
+<p>This gives some insight but we have 280 (mice) or 10,000+(frogs) genes to consider. How do we know if the pair we use is typical? How can we consider al the genes at once?</p>
+</section><section id="pca-3" class="slide level2"><h2>PCA</h2>
+<ul>
+<li>PCA is a solution for this - It takes a large number of continuous variables (like gene expression) and reduces them to a smaller number of “principal components” that explain most of the variation in the data.</li>
+</ul>
+<div class="quarto-layout-panel">
+<div class="quarto-layout-row quarto-layout-valign-top">
+<div class="quarto-layout-cell" style="flex-basis: 50.0%;justify-content: center;">
+<div class="quarto-figure quarto-figure-center">
+<figure><p><img data-src="figures/frog-s30-pca.png" width="300"></p>
+<p></p><figcaption>Samples</figcaption><p></p>
+</figure>
+</div>
+</div>
+<div class="quarto-layout-cell" style="flex-basis: 50.0%;justify-content: center;">
+<div class="quarto-figure quarto-figure-center">
+<figure><p><img data-src="figures/prog_hspc-pca.png" width="300"></p>
+<p></p><figcaption>Cells</figcaption><p></p>
+</figure>
+</div>
+</div>
+</div>
+</div>
+<p>We have done PCA in Omics 3, but often PCA might be one of the first exploratory steps because it gives you an idea whether you expect general patterns in gene expression that distinguish groups.</p>
+</section></section><section><section id="heatmaps" class="title-slide slide level1 center"><h1>Heatmaps</h1>
+
+</section><section id="heatmaps-1" class="slide level2"><h2>Heatmaps</h2>
+<div>
+<ul>
+<li class="fragment"><p>are a grid of genes on one axis and samples on the other with each grid cell coloured by another variable</p></li>
+<li class="fragment"><p>in this case the other variable is gene expression</p></li>
+<li class="fragment"><p>they allow you to quickly get an overview of the expression patterns across genes and samples</p></li>
+<li class="fragment"><p>we often couple them with clustering to group genes and samples with similar expression patterns together which helps us see which genes are responsible for distinguishing groups</p></li>
+</ul>
+</div>
+</section><section id="section-1" class="slide level2"><h2></h2>
+<div class="quarto-figure quarto-figure-center">
+<figure><p><img data-src="images/frog-heat.png" height="800"></p>
+<p></p><figcaption>Heat map for the frog data</figcaption><p></p>
+</figure>
+</div>
+<p>See next slide for information</p>
+</section><section id="heatmaps-2" class="slide level2"><h2>Heatmaps</h2>
+<div>
+<ul>
+<li class="fragment"><p>On the vertical axis are genes which are differentially expressed at the 0.01 level</p></li>
+<li class="fragment"><p>On the horizontal axis are samples</p></li>
+<li class="fragment"><p>We can see that the FGF-treated samples cluster together and the control samples cluster together</p></li>
+<li class="fragment"><p>We can also see two clusters of genes; one of these shows genes upregulated (more yellow) in the FGF-treated samples and the other shows genes downregulated (more blue) in the FGF-treated samples</p></li>
+</ul>
+</div>
+</section></section><section><section id="volcano-plots" class="title-slide slide level1 center"><h1>Volcano plots</h1>
+
+</section><section id="volcano-plots-1" class="slide level2"><h2>Volcano plots</h2>
+<div>
+<ul>
+<li class="fragment"><p>Volcano plots often used to visualise the results of differential expression analysis</p></li>
+<li class="fragment"><p>They are just a scatter of the corrected p value against the fold change….</p></li>
+<li class="fragment"><p>almost - the we actually plot the negative log of the corrected p value against the fold change</p></li>
+</ul>
+</div>
+</section><section id="volcano-plots-2" class="slide level2"><h2>Volcano plots</h2>
+<div style="font-size: 70%;">
+<ul>
+<li><p>This is because just plotting the <em>p</em>-value means the axis is counter intuitive. Small <em>p</em>-values (i.e., significant values) are at the bottom of the axis)</p></li>
+<li><p>And since <em>p</em>-values range from 1 to very tiny the points are all squashed at the bottom of the axis</p></li>
+</ul>
+</div>
+
+<img data-src="images/volcano-why.png" class="r-stretch quarto-figure-center"><p class="caption">Volcano plot FDR against fold change</p></section><section id="volcano-plots-3" class="slide level2"><h2>Volcano plots</h2>
+<div style="font-size: 70%;">
+<ul>
+<li>Plotting the negative log of the corrected <em>p</em>-value means that the values are spread out and the significant values are at the top of the axis</li>
+</ul>
+</div>
+
+<img data-src="figures/prog-hspc-volcano.png" class="r-stretch quarto-figure-center"><p class="caption">Volcano plot -log(FDR) against fold change</p></section><section id="visualisations" class="slide level2"><h2>Visualisations</h2>
+<ul>
+<li><p>Should be done on normalised data so meaningful comparisons can be made</p></li>
+<li><p>The 🐭 mouse data were already log<sub>2</sub>normalised</p></li>
+<li><p>The 🐸 frog data were normalised by the DE method and saved to file. We will log<sub>2</sub> transform before doing visualisations</p></li>
+</ul></section><section id="packages-to-install-before-the-workshop" class="slide level2"><h2>Packages to install before the workshop</h2>
+<p><strong><code>heatmaply</code></strong> <strong><code>ggrepel</code></strong> from CRAN in the the normal way:</p>
+<div class="cell">
+<div class="sourceCode" id="cb3"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/install.packages.html">install.packages</a></span><span class="op">(</span><span class="st">"heatmaply"</span><span class="op">)</span></span>
+<span><span class="fu"><a href="https://rdrr.io/r/utils/install.packages.html">install.packages</a></span><span class="op">(</span><span class="st">"ggrepel"</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p><strong><code>biomaRt</code></strong> from Bioconductor using <strong><code>BiocManager</code></strong>:</p>
+<div class="cell">
+<div class="sourceCode" id="cb4"><pre class="downlit sourceCode r code-with-copy"><code class="sourceCode R"><span><span class="fu">BiocManager</span><span class="fu">::</span><span class="fu"><a href="https://bioconductor.github.io/BiocManager/reference/install.html">install</a></span><span class="op">(</span><span class="st">"biomaRt"</span><span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</section></section><section><section id="workshops" class="title-slide slide level1 center"><h1>Workshops</h1>
+
+</section><section id="workshops-1" class="slide level2"><h2>Workshops</h2>
+<ul>
+<li><p>Omics 1: Hello data Getting to know the data. Checking the distributions of values</p></li>
+<li><p>Omics 2: Statistical Analysis Identifying which genes are differentially expressed between treatments.</p></li>
+<li><p>Omics 3: Visualising and Interpreting. PCA, Volcano plots and heatmaps to visualise results. Interpreting the results and finding out more about genes of interest.</p></li>
+</ul></section><section id="references" class="slide level2 smaller scrollable"><h2>References</h2>
+
+<div class="footer footer-default">
+<p>🔗 <a href="https://3mmarand.github.io/BIO00088H-data/omics/week-5/overview.html">About Omics 3: Visualising and Interpreting</a></p>
+</div>
+<div id="refs" class="references csl-bib-body hanging-indent" role="doc-bibliography">
+<div id="ref-birney2004" class="csl-entry" role="doc-biblioentry">
+Birney, Ewan, T. Daniel Andrews, Paul Bevan, Mario Caccamo, Yuan Chen, Laura Clarke, Guy Coates, et al. 2004. <span>“An Overview of Ensembl.”</span> <em>Genome Research</em> 14 (5): 925–28. <a href="https://doi.org/10.1101/gr.1860604">https://doi.org/10.1101/gr.1860604</a>.
+</div>
+<div id="ref-biomaRt" class="csl-entry" role="doc-biblioentry">
+Durinck, Steffen, Paul T. Spellman, Ewan Birney, and Wolfgang Huber. 2009. <span>“Mapping Identifiers for the Integration of Genomic Datasets with the r/Bioconductor Package biomaRt”</span> 4.
+</div>
+<div id="ref-fisher2023" class="csl-entry" role="doc-biblioentry">
+Fisher, Malcolm, Christina James-Zorn, Virgilio Ponferrada, Andrew J Bell, Nivitha Sundararaj, Erik Segerdell, Praneet Chaturvedi, et al. 2023. <span>“Xenbase: Key Features and Resources of the Xenopus Model Organism Knowledgebase.”</span> <em>Genetics</em> 224 (1): iyad018. <a href="https://doi.org/10.1093/genetics/iyad018">https://doi.org/10.1093/genetics/iyad018</a>.
+</div>
+<div id="ref-DESeq2" class="csl-entry" role="doc-biblioentry">
+Love, Michael I., Wolfgang Huber, and Simon Anders. 2014. <span>“Moderated Estimation of Fold Change and Dispersion for RNA-Seq Data with DESeq2”</span> 15: 550. <a href="https://doi.org/10.1186/s13059-014-0550-8">https://doi.org/10.1186/s13059-014-0550-8</a>.
+</div>
+<div id="ref-scran" class="csl-entry" role="doc-biblioentry">
+Lun, Aaron T. L., Davis J. McCarthy, and John C. Marioni. 2016. <span>“A Step-by-Step Workflow for Low-Level Analysis of Single-Cell RNA-Seq Data with Bioconductor”</span> 5: 2122. <a href="https://doi.org/10.12688/f1000research.9501.2">https://doi.org/10.12688/f1000research.9501.2</a>.
+</div>
+<div id="ref-rand2021" class="csl-entry" role="doc-biblioentry">
+Rand, Emma. 2021. <em>Data Science Strand of BIO00058M</em>. <a href="https://doi.org/10.5281/zenodo.5527705">https://doi.org/10.5281/zenodo.5527705</a>.
+</div>
+</div>
+</section></section>
+</div>
+  </div>
+
+  <script>window.backupDefine = window.define; window.define = undefined;</script><script src="../../site_libs/revealjs/dist/reveal.js"></script><!-- reveal.js plugins --><script src="../../site_libs/revealjs/plugin/quarto-line-highlight/line-highlight.js"></script><script src="../../site_libs/revealjs/plugin/pdf-export/pdfexport.js"></script><script src="../../site_libs/revealjs/plugin/reveal-menu/menu.js"></script><script src="../../site_libs/revealjs/plugin/reveal-menu/quarto-menu.js"></script><script src="../../site_libs/revealjs/plugin/reveal-chalkboard/plugin.js"></script><script src="../../site_libs/revealjs/plugin/quarto-support/support.js"></script><script src="../../site_libs/revealjs/plugin/notes/notes.js"></script><script src="../../site_libs/revealjs/plugin/search/search.js"></script><script src="../../site_libs/revealjs/plugin/zoom/zoom.js"></script><script src="../../site_libs/revealjs/plugin/math/math.js"></script><script>window.define = window.backupDefine; window.backupDefine = undefined;</script><script>
+
+      // Full list of configuration options available at:
+      // https://revealjs.com/config/
+      Reveal.initialize({
+'controlsAuto': true,
+'previewLinksAuto': false,
+'smaller': false,
+'pdfSeparateFragments': false,
+'autoAnimateEasing': "ease",
+'autoAnimateDuration': 1,
+'autoAnimateUnmatched': true,
+'menu': {"side":"left","useTextContentForMissingTitles":true,"markers":false,"loadIcons":false,"custom":[{"title":"Tools","icon":"<i class=\"fas fa-gear\"></i>","content":"<ul class=\"slide-menu-items\">\n<li class=\"slide-tool-item active\" data-item=\"0\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.fullscreen(event)\"><kbd>f</kbd> Fullscreen</a></li>\n<li class=\"slide-tool-item\" data-item=\"1\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.speakerMode(event)\"><kbd>s</kbd> Speaker View</a></li>\n<li class=\"slide-tool-item\" data-item=\"2\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.overview(event)\"><kbd>o</kbd> Slide Overview</a></li>\n<li class=\"slide-tool-item\" data-item=\"3\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.overview(event)\"><kbd>e</kbd> PDF Export Mode</a></li>\n<li class=\"slide-tool-item\" data-item=\"4\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.toggleChalkboard(event)\"><kbd>b</kbd> Toggle Chalkboard</a></li>\n<li class=\"slide-tool-item\" data-item=\"5\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.toggleNotesCanvas(event)\"><kbd>c</kbd> Toggle Notes Canvas</a></li>\n<li class=\"slide-tool-item\" data-item=\"6\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.downloadDrawings(event)\"><kbd>d</kbd> Download Drawings</a></li>\n<li class=\"slide-tool-item\" data-item=\"7\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.keyboardHelp(event)\"><kbd>?</kbd> Keyboard Help</a></li>\n</ul>"}],"openButton":true},
+'chalkboard': {"buttons":true},
+'smaller': false,
+ 
+        // Display controls in the bottom right corner
+        controls: false,
+
+        // Help the user learn the controls by providing hints, for example by
+        // bouncing the down arrow when they first encounter a vertical slide
+        controlsTutorial: false,
+
+        // Determines where controls appear, "edges" or "bottom-right"
+        controlsLayout: 'edges',
+
+        // Visibility rule for backwards navigation arrows; "faded", "hidden"
+        // or "visible"
+        controlsBackArrows: 'faded',
+
+        // Display a presentation progress bar
+        progress: true,
+
+        // Display the page number of the current slide
+        slideNumber: 'c/t',
+
+        // 'all', 'print', or 'speaker'
+        showSlideNumber: 'all',
+
+        // Add the current slide number to the URL hash so that reloading the
+        // page/copying the URL will return you to the same slide
+        hash: true,
+
+        // Start with 1 for the hash rather than 0
+        hashOneBasedIndex: false,
+
+        // Flags if we should monitor the hash and change slides accordingly
+        respondToHashChanges: true,
+
+        // Push each slide change to the browser history
+        history: true,
+
+        // Enable keyboard shortcuts for navigation
+        keyboard: true,
+
+        // Enable the slide overview mode
+        overview: true,
+
+        // Disables the default reveal.js slide layout (scaling and centering)
+        // so that you can use custom CSS layout
+        disableLayout: false,
+
+        // Vertical centering of slides
+        center: false,
+
+        // Enables touch navigation on devices with touch input
+        touch: true,
+
+        // Loop the presentation
+        loop: false,
+
+        // Change the presentation direction to be RTL
+        rtl: false,
+
+        // see https://revealjs.com/vertical-slides/#navigation-mode
+        navigationMode: 'linear',
+
+        // Randomizes the order of slides each time the presentation loads
+        shuffle: false,
+
+        // Turns fragments on and off globally
+        fragments: true,
+
+        // Flags whether to include the current fragment in the URL,
+        // so that reloading brings you to the same fragment position
+        fragmentInURL: false,
+
+        // Flags if the presentation is running in an embedded mode,
+        // i.e. contained within a limited portion of the screen
+        embedded: false,
+
+        // Flags if we should show a help overlay when the questionmark
+        // key is pressed
+        help: true,
+
+        // Flags if it should be possible to pause the presentation (blackout)
+        pause: true,
+
+        // Flags if speaker notes should be visible to all viewers
+        showNotes: false,
+
+        // Global override for autoplaying embedded media (null/true/false)
+        autoPlayMedia: null,
+
+        // Global override for preloading lazy-loaded iframes (null/true/false)
+        preloadIframes: null,
+
+        // Number of milliseconds between automatically proceeding to the
+        // next slide, disabled when set to 0, this value can be overwritten
+        // by using a data-autoslide attribute on your slides
+        autoSlide: 0,
+
+        // Stop auto-sliding after user input
+        autoSlideStoppable: true,
+
+        // Use this method for navigation when auto-sliding
+        autoSlideMethod: null,
+
+        // Specify the average time in seconds that you think you will spend
+        // presenting each slide. This is used to show a pacing timer in the
+        // speaker view
+        defaultTiming: null,
+
+        // Enable slide navigation via mouse wheel
+        mouseWheel: false,
+
+        // The display mode that will be used to show slides
+        display: 'block',
+
+        // Hide cursor if inactive
+        hideInactiveCursor: true,
+
+        // Time before the cursor is hidden (in ms)
+        hideCursorTime: 5000,
+
+        // Opens links in an iframe preview overlay
+        previewLinks: false,
+
+        // Transition style (none/fade/slide/convex/concave/zoom)
+        transition: 'none',
+
+        // Transition speed (default/fast/slow)
+        transitionSpeed: 'default',
+
+        // Transition style for full page slide backgrounds
+        // (none/fade/slide/convex/concave/zoom)
+        backgroundTransition: 'none',
+
+        // Number of slides away from the current that are visible
+        viewDistance: 3,
+
+        // Number of slides away from the current that are visible on mobile
+        // devices. It is advisable to set this to a lower number than
+        // viewDistance in order to save resources.
+        mobileViewDistance: 2,
+
+        // The "normal" size of the presentation, aspect ratio will be preserved
+        // when the presentation is scaled to fit different resolutions. Can be
+        // specified using percentage units.
+        width: 1050,
+
+        height: 700,
+
+        // Factor of the display size that should remain empty around the content
+        margin: 0.1,
+
+        math: {
+          mathjax: 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js',
+          config: 'TeX-AMS_HTML-full',
+          tex2jax: {
+            inlineMath: [['\\(','\\)']],
+            displayMath: [['\\[','\\]']],
+            balanceBraces: true,
+            processEscapes: false,
+            processRefs: true,
+            processEnvironments: true,
+            preview: 'TeX',
+            skipTags: ['script','noscript','style','textarea','pre','code'],
+            ignoreClass: 'tex2jax_ignore',
+            processClass: 'tex2jax_process'
+          },
+        },
+
+        // reveal.js plugins
+        plugins: [QuartoLineHighlight, PdfExport, RevealMenu, RevealChalkboard, QuartoSupport,
+
+          RevealMath,
+          RevealNotes,
+          RevealSearch,
+          RevealZoom
+        ]
+      });
+    </script><script>
+      // htmlwidgets need to know to resize themselves when slides are shown/hidden.
+      // Fire the "slideenter" event (handled by htmlwidgets.js) when the current
+      // slide changes (different for each slide format).
+      (function () {
+        // dispatch for htmlwidgets
+        function fireSlideEnter() {
+          const event = window.document.createEvent("Event");
+          event.initEvent("slideenter", true, true);
+          window.document.dispatchEvent(event);
+        }
+
+        function fireSlideChanged(previousSlide, currentSlide) {
+          fireSlideEnter();
+
+          // dispatch for shiny
+          if (window.jQuery) {
+            if (previousSlide) {
+              window.jQuery(previousSlide).trigger("hidden");
+            }
+            if (currentSlide) {
+              window.jQuery(currentSlide).trigger("shown");
+            }
+          }
+        }
+
+        // hookup for slidy
+        if (window.w3c_slidy) {
+          window.w3c_slidy.add_observer(function (slide_num) {
+            // slide_num starts at position 1
+            fireSlideChanged(null, w3c_slidy.slides[slide_num - 1]);
+          });
+        }
+
+      })();
+    </script><script id="quarto-html-after-body" type="application/javascript">
+    window.document.addEventListener("DOMContentLoaded", function (event) {
+      const toggleBodyColorMode = (bsSheetEl) => {
+        const mode = bsSheetEl.getAttribute("data-mode");
+        const bodyEl = window.document.querySelector("body");
+        if (mode === "dark") {
+          bodyEl.classList.add("quarto-dark");
+          bodyEl.classList.remove("quarto-light");
+        } else {
+          bodyEl.classList.add("quarto-light");
+          bodyEl.classList.remove("quarto-dark");
+        }
+      }
+      const toggleBodyColorPrimary = () => {
+        const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+        if (bsSheetEl) {
+          toggleBodyColorMode(bsSheetEl);
+        }
+      }
+      toggleBodyColorPrimary();  
+      const tabsets =  window.document.querySelectorAll(".panel-tabset-tabby")
+      tabsets.forEach(function(tabset) {
+        const tabby = new Tabby('#' + tabset.id);
+      });
+      const clipboard = new window.ClipboardJS('.code-copy-button', {
+        target: function(trigger) {
+          return trigger.previousElementSibling;
+        }
+      });
+      clipboard.on('success', function(e) {
+        // button target
+        const button = e.trigger;
+        // don't keep focus
+        button.blur();
+        // flash "checked"
+        button.classList.add('code-copy-button-checked');
+        var currentTitle = button.getAttribute("title");
+        button.setAttribute("title", "Copied!");
+        let tooltip;
+        if (window.bootstrap) {
+          button.setAttribute("data-bs-toggle", "tooltip");
+          button.setAttribute("data-bs-placement", "left");
+          button.setAttribute("data-bs-title", "Copied!");
+          tooltip = new bootstrap.Tooltip(button, 
+            { trigger: "manual", 
+              customClass: "code-copy-button-tooltip",
+              offset: [0, -8]});
+          tooltip.show();    
+        }
+        setTimeout(function() {
+          if (tooltip) {
+            tooltip.hide();
+            button.removeAttribute("data-bs-title");
+            button.removeAttribute("data-bs-toggle");
+            button.removeAttribute("data-bs-placement");
+          }
+          button.setAttribute("title", currentTitle);
+          button.classList.remove('code-copy-button-checked');
+        }, 1000);
+        // clear code selection
+        e.clearSelection();
+      });
+      function tippyHover(el, contentFn) {
+        const config = {
+          allowHTML: true,
+          content: contentFn,
+          maxWidth: 500,
+          delay: 100,
+          arrow: false,
+          appendTo: function(el) {
+              return el.closest('section.slide') || el.parentElement;
+          },
+          interactive: true,
+          interactiveBorder: 10,
+          theme: 'quarto-reveal',
+          placement: 'bottom-start'
+        };
+          config['offset'] = [0,0];
+          config['maxWidth'] = 700;
+        window.tippy(el, config); 
+      }
+      const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+      for (var i=0; i<noterefs.length; i++) {
+        const ref = noterefs[i];
+        tippyHover(ref, function() {
+          // use id or data attribute instead here
+          let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+          try { href = new URL(href).hash; } catch {}
+          const id = href.replace(/^#\/?/, "");
+          const note = window.document.getElementById(id);
+          return note.innerHTML;
+        });
+      }
+      const findCites = (el) => {
+        const parentEl = el.parentElement;
+        if (parentEl) {
+          const cites = parentEl.dataset.cites;
+          if (cites) {
+            return {
+              el,
+              cites: cites.split(' ')
+            };
+          } else {
+            return findCites(el.parentElement)
+          }
+        } else {
+          return undefined;
+        }
+      };
+      var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+      for (var i=0; i<bibliorefs.length; i++) {
+        const ref = bibliorefs[i];
+        const citeInfo = findCites(ref);
+        if (citeInfo) {
+          tippyHover(citeInfo.el, function() {
+            var popup = window.document.createElement('div');
+            citeInfo.cites.forEach(function(cite) {
+              var citeDiv = window.document.createElement('div');
+              citeDiv.classList.add('hanging-indent');
+              citeDiv.classList.add('csl-entry');
+              var biblioDiv = window.document.getElementById('ref-' + cite);
+              if (biblioDiv) {
+                citeDiv.innerHTML = biblioDiv.innerHTML;
+              }
+              popup.appendChild(citeDiv);
+            });
+            return popup.innerHTML;
+          });
+        }
+      }
+    });
+    </script>
+
+
+</body></html>
\ No newline at end of file
diff --git a/_site/search.json b/_site/search.json
index 20eaa12..41513ad 100644
--- a/_site/search.json
+++ b/_site/search.json
@@ -1417,5 +1417,250 @@
       "Week 4: Statistical Analysis",
       "Prepare!"
     ]
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#overview",
+    "href": "omics/week-5/study_before_workshop.html#overview",
+    "title": "Independent Study to prepare for workshop",
+    "section": "Overview",
+    "text": "Overview\nIn these slides we will:\n\n\nCheck where you are\n\nlearn some concepts used omics visualisation\n\nPrinciple Component Analysis (PCA)\nVolcano plots\nHeatmaps\n\n\nFind out what packages to install before the workshop"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#what-we-did-in-omics-2-statistical-analysis",
+    "href": "omics/week-5/study_before_workshop.html#what-we-did-in-omics-2-statistical-analysis",
+    "title": "Independent Study to prepare for workshop",
+    "section": "What we did in Omics 2: Statistical Analysis",
+    "text": "What we did in Omics 2: Statistical Analysis\n\n\ncarried out differential expression analysis\nfound genes not expressed at all, or expressed in one group only\nSaved results files"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#where-should-you-be-1",
+    "href": "omics/week-5/study_before_workshop.html#where-should-you-be-1",
+    "title": "Independent Study to prepare for workshop",
+    "section": "Where should you be?",
+    "text": "Where should you be?\nAfter the Omics 2: 👋 Statistical Analysis Workshop including:\n\n🤗 Look after future you! and\nthe Independent Study to consolidate, you should have:"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#frogs",
+    "href": "omics/week-5/study_before_workshop.html#frogs",
+    "title": "Independent Study to prepare for workshop",
+    "section": "🐸 Frogs",
+    "text": "🐸 Frogs\n\n\nAn RStudio Project called frogs-88H which contains:\n\nRaw data (S14, S20 and S30)\nProcessed data (s30_filtered.csv, s30_summary_gene.csv, s30_summary_gene_filtered.csv, s30_summary_samp.csv and equivalents for S14 OR S20)\nResults files (s30_fgf_only.csv, S30_normalised_counts.csv, S30_results.csv and equivalents for S14 OR S20)\n\nTwo scripts called cont-fgf-s30.R and either cont-fgf-s20.R OR cont-fgf-s14.R\n\n\n\n\n\nFiles should be organised into folders. Code should well commented and easy to read."
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#mice",
+    "href": "omics/week-5/study_before_workshop.html#mice",
+    "title": "Independent Study to prepare for workshop",
+    "section": "🐭 Mice",
+    "text": "🐭 Mice\n\n\nAn RStudio Project called mice-88H which contains\n\nRaw data (hspc, prog, lthsc)\nProcessed data (hspc_summary_gene.csv, hspc_summary_samp.csv, prog_summary_gene.csv, prog_summary_samp.csv, lthsc_summary_gene.csv, lthsc_summary_samp.csv)\n\n\nResults files (prog_hspc_results.csv and an equivalent for lthsc vs prog or hspc vs lthsc)\nTwo scripts called hspc-prog.R and either hspc-lthsc.R OR prog-lthsc.R\n\n\n\nFiles should be organised into folders. Code should well commented and easy to read."
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#section",
+    "href": "omics/week-5/study_before_workshop.html#section",
+    "title": "Independent Study to prepare for workshop",
+    "section": "🍂",
+    "text": "🍂\nEither of the other examples."
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#if-you-do-not-have-those",
+    "href": "omics/week-5/study_before_workshop.html#if-you-do-not-have-those",
+    "title": "Independent Study to prepare for workshop",
+    "section": "If you do not have those",
+    "text": "If you do not have those\nGo through:\n\nOmics 2: Statistical Analysis including:\n🤗 Look after future you! and\nthe Independent Study to consolidate"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#examine-the-results-files-1",
+    "href": "omics/week-5/study_before_workshop.html#examine-the-results-files-1",
+    "title": "Independent Study to prepare for workshop",
+    "section": "Examine the results files",
+    "text": "Examine the results files\nRemind yourself of the key columns you have in the results files:\n\na log2 fold change\nan unadjusted p-value\na p value adjusted for multiple testing (FDR or padj)\na gene id"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#frogs-1",
+    "href": "omics/week-5/study_before_workshop.html#frogs-1",
+    "title": "Independent Study to prepare for workshop",
+    "section": "🐸 Frogs",
+    "text": "🐸 Frogs\n\n\nRows: 10,136\nColumns: 7\n$ baseMean        <dbl> 237.553928, 531.565700, 86.392830, 49.813502, 419.9983…\n$ log2FoldChange  <dbl> 0.096601855, -0.089588528, -0.192811203, -0.008858703,…\n$ lfcSE           <dbl> 0.2079396, 0.1557384, 0.3253216, 0.4342614, 0.1685420,…\n$ stat            <dbl> 0.46456683, -0.57525007, -0.59267874, -0.02039947, -0.…\n$ pvalue          <dbl> 0.64224169, 0.56512218, 0.55339617, 0.98372471, 0.8699…\n$ padj            <dbl> 0.9998970, 0.9998970, 0.9998970, 0.9998970, 0.9998970,…\n$ xenbase_gene_id <chr> \"XB-GENE-1000007\", \"XB-GENE-1000023\", \"XB-GENE-1000062…\n\n\n\n\n\nbaseMean is the mean of the normalised counts for the gene across all samples\n\nlfcSE standard error of the fold change\n\nstat is the test statistic (the Wald statistic)\nGenerated by DESeq2 (Love, Huber, and Anders 2014)"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#mice-1",
+    "href": "omics/week-5/study_before_workshop.html#mice-1",
+    "title": "Independent Study to prepare for workshop",
+    "section": "🐭 Mice",
+    "text": "🐭 Mice\n\n\nRows: 280\nColumns: 6\n$ Top             <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…\n$ p.value         <dbl> 7.038138e-117, 4.736622e-90, 1.832630e-88, 4.211954e-7…\n$ FDR             <dbl> 1.970679e-114, 6.631271e-88, 1.710455e-86, 2.948368e-7…\n$ summary.logFC   <dbl> 1.596910, 3.035165, 3.261056, -2.146491, -3.056730, 3.…\n$ logFC.hspc      <dbl> 1.596910, 3.035165, 3.261056, -2.146491, -3.056730, 3.…\n$ ensembl_gene_id <chr> \"ENSMUSG00000028639\", \"ENSMUSG00000024053\", \"ENSMUSG00…\n\n\n\n\nTop is the rank of the gene ordered by the p-value (smallest first)\n\nsummary.logFC and logFC.hspc give the same value (in this case since comparing two cell types)\ngenerated by scran (Lun, McCarthy, and Marioni 2016)"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#from-xenbase",
+    "href": "omics/week-5/study_before_workshop.html#from-xenbase",
+    "title": "Independent Study to prepare for workshop",
+    "section": "from xenbase",
+    "text": "from xenbase\n\nxenbase logoXenbase (http://www.xenbase.org/, RRID:SCR_003280)\nXenbase is a model organism database that provides genomic, molecular, and developmental biology information about Xenopus laevis and Xenopus tropicalis. Xenbase is funded by the National Institutes of Health (NIH) and the National Science Foundation (NSF).\nour data gives the xenbase gene id so we are using xenbase to get the information a lot of the information would also be in the ncbi"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#from-the-ncbi",
+    "href": "omics/week-5/study_before_workshop.html#from-the-ncbi",
+    "title": "Independent Study to prepare for workshop",
+    "section": "from the ncbi",
+    "text": "from the ncbi\nbiomart is a package that allows you to get information from the ncbi database such as gene names and descriptions"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#plots-purpose",
+    "href": "omics/week-5/study_before_workshop.html#plots-purpose",
+    "title": "Independent Study to prepare for workshop",
+    "section": "plots purpose",
+    "text": "plots purpose\ndimsenion reduction"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#pca",
+    "href": "omics/week-5/study_before_workshop.html#pca",
+    "title": "Independent Study to prepare for workshop",
+    "section": "PCA",
+    "text": "PCA\n\n\nPrincipal Component Analysis is an unsupervised machine learning technique\nUnsupervised methods1 are unsupervised in that they do not use/optimise to a particular output. The goal is to uncover structure. They do not test hypotheses\nIt is often used to visualise high dimensional data because it is a dimension reduction technique\n\n\nYou may wish to read a previous introduction to unsupervised methods I have written An introduction to Machine Learning: Unsupervised methods (Rand 2021)"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#tsne",
+    "href": "omics/week-5/study_before_workshop.html#tsne",
+    "title": "Independent Study to prepare for workshop",
+    "section": "tsne",
+    "text": "tsne\nlots of variables and lots of observations"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#normalising-before-plotting",
+    "href": "omics/week-5/study_before_workshop.html#normalising-before-plotting",
+    "title": "Independent Study to prepare for workshop",
+    "section": "normalising before plotting",
+    "text": "normalising before plotting\nlog\nnormalisation regularised log is a method to bias from low count genes. https://hbctraining.github.io/DGE_workshop_salmon_online/lessons/03_DGE_QC_analysis.html\n\n\nT\n\n\nrlog is a method to bias from low count genes. https://hbctraining.github.io/DGE_workshop_salmon_online/lessons/03_DGE_QC_analysis.html gives a good explanation of regularized the log transform (rlog)\nThe rlog transformation of the normalized counts is only necessary for these visualization methods during this quality assessment. They are not used for DE because DESeq2 takes care of that\nin the workshop we just to log transformed\n\nThe 🐭 mouse data have been normalised to simplify the analysis for you; the 🐸 frog data have not but the DE method will do this for you."
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#packages-to-install-before-the-workshop",
+    "href": "omics/week-5/study_before_workshop.html#packages-to-install-before-the-workshop",
+    "title": "Independent Study to prepare for workshop",
+    "section": "Packages to install before the workshop",
+    "text": "Packages to install before the workshop\nheatmaply ggrepel from CRAN in the the normal way:\n\ninstall.packages(\"heatmaply\")\ninstall.packages(\"ggrepel\")\n\nbiomaRt from Bioconductor using BiocManager:\n\nBiocManager::install(\"biomaRt\")"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#workshops-1",
+    "href": "omics/week-5/study_before_workshop.html#workshops-1",
+    "title": "Independent Study to prepare for workshop",
+    "section": "Workshops",
+    "text": "Workshops\n\nOmics 1: Hello data Getting to know the data. Checking the distributions of values\nOmics 2: Statistical Analysis Identifying which genes are differentially expressed between treatments.\nOmics 3: Visualising and Interpreting. PCA, Volcano plots and heatmaps to visualise results. Interpreting the results and finding out more about genes of interest."
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#references",
+    "href": "omics/week-5/study_before_workshop.html#references",
+    "title": "Independent Study to prepare for workshop",
+    "section": "References",
+    "text": "References\n\n\n🔗 About Omics 3: Visualising and Interpreting\n\n\n\nBirney, Ewan, T. Daniel Andrews, Paul Bevan, Mario Caccamo, Yuan Chen, Laura Clarke, Guy Coates, et al. 2004. “An Overview of Ensembl.” Genome Research 14 (5): 925–28. https://doi.org/10.1101/gr.1860604.\n\n\nDurinck, Steffen, Paul T. Spellman, Ewan Birney, and Wolfgang Huber. 2009. “Mapping Identifiers for the Integration of Genomic Datasets with the r/Bioconductor Package biomaRt” 4.\n\n\nFisher, Malcolm, Christina James-Zorn, Virgilio Ponferrada, Andrew J Bell, Nivitha Sundararaj, Erik Segerdell, Praneet Chaturvedi, et al. 2023. “Xenbase: Key Features and Resources of the Xenopus Model Organism Knowledgebase.” Genetics 224 (1): iyad018. https://doi.org/10.1093/genetics/iyad018.\n\n\nLove, Michael I., Wolfgang Huber, and Simon Anders. 2014. “Moderated Estimation of Fold Change and Dispersion for RNA-Seq Data with DESeq2” 15: 550. https://doi.org/10.1186/s13059-014-0550-8.\n\n\nLun, Aaron T. L., Davis J. McCarthy, and John C. Marioni. 2016. “A Step-by-Step Workflow for Low-Level Analysis of Single-Cell RNA-Seq Data with Bioconductor” 5: 2122. https://doi.org/10.12688/f1000research.9501.2.\n\n\nRand, Emma. 2021. Data Science Strand of BIO00058M. https://doi.org/10.5281/zenodo.5527705."
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#adding-gene-information-1",
+    "href": "omics/week-5/study_before_workshop.html#adding-gene-information-1",
+    "title": "Independent Study to prepare for workshop",
+    "section": "Adding gene information",
+    "text": "Adding gene information\n\n\nThe gene id is difficult to interpret in plots/tables\nTherefore we need to add information such as the gene name and a description to the results\nFor the 🐸 Frog data information comes from Xenbase (Fisher et al. 2023)\nFor the 🐭 Mice data information comes from Ensembl (Birney et al. 2004)"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#xenbase",
+    "href": "omics/week-5/study_before_workshop.html#xenbase",
+    "title": "Independent Study to prepare for workshop",
+    "section": "🐸 Xenbase",
+    "text": "🐸 Xenbase\n\nxenbase logoXenbase is a model organism database that provides genomic, molecular, and developmental biology information about Xenopus laevis and Xenopus tropicalis.\n\nIt took me some time to find the information you need."
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#xenbase-1",
+    "href": "omics/week-5/study_before_workshop.html#xenbase-1",
+    "title": "Independent Study to prepare for workshop",
+    "section": "🐸 Xenbase",
+    "text": "🐸 Xenbase\n\n\nI got the information from the Xenbase information pages under Data Reports | Gene Information\nThis is listed: Xenbase Gene Product Information [readme] gzipped gpi (tab separated)\nClick on the readme link to see the file format and columns\nI downloaded xenbase.gpi.gz, unzipped it, removed header lines and the Xenopus tropicalis (taxon:8364) entries and saved it as xenbase_info.xlsx\nIn the workshop you will import this file and merge the information with the results file"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#ensembl",
+    "href": "omics/week-5/study_before_workshop.html#ensembl",
+    "title": "Independent Study to prepare for workshop",
+    "section": "🐭 Ensembl",
+    "text": "🐭 Ensembl\n\n\nEnsembl creates, integrates and distributes reference datasets and analysis tools that enable genomics\nBioMart provides a access to these large datasets\nbiomaRt (Durinck et al. 2009) is a Bioconductor package gives you programmatic access to BioMart.\nIn the workshop you use this package to get information you can merge with the results file"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#what-is-the-purpose-of-an-omics-plot",
+    "href": "omics/week-5/study_before_workshop.html#what-is-the-purpose-of-an-omics-plot",
+    "title": "Independent Study to prepare for workshop",
+    "section": "What is the purpose of an Omics plot?",
+    "text": "What is the purpose of an Omics plot?\n\n\nIn general, we plot data to help us summarise and understand it\nThis is especially import for omics data where we have a very large number of variables and often a large number of observations\nWe will look at three plots very commonly used in omics analysis: Principal Component Analysis (PCA) plot, Heatmaps and Volcano Plots"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#pca-1",
+    "href": "omics/week-5/study_before_workshop.html#pca-1",
+    "title": "Independent Study to prepare for workshop",
+    "section": "PCA",
+    "text": "PCA\n\n\nIt takes a large number of continuous variables (like gene expression) and reduces them to a smaller number of variables (called principal components) that explain most of the variation in the data\nThe principal components can be plotted to see how samples cluster together"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#pca-2",
+    "href": "omics/week-5/study_before_workshop.html#pca-2",
+    "title": "Independent Study to prepare for workshop",
+    "section": "PCA",
+    "text": "PCA\n\nTo see if samples cluster as we would expect, we might plot the expression of one gene against another\n\n\n\n\n\n\nSamples\n\n\n\n\n\n\nCells\n\n\n\n\n\nThis gives some insight but we have 280 (mice) or 10,000+(frogs) genes to consider. How do we know if the pair we use is typical? How can we consider al the genes at once?"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#heatmaps-1",
+    "href": "omics/week-5/study_before_workshop.html#heatmaps-1",
+    "title": "Independent Study to prepare for workshop",
+    "section": "Heatmaps",
+    "text": "Heatmaps\n\n\nare a grid of genes on one axis and samples on the other with each grid cell coloured by another variable\nin this case the other variable is gene expression\nthey allow you to quickly get an overview of the expression patterns across genes and samples\nwe often couple them with clustering to group genes and samples with similar expression patterns together which helps us see which genes are responsible for distinguishing groups"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#volcano-plots-1",
+    "href": "omics/week-5/study_before_workshop.html#volcano-plots-1",
+    "title": "Independent Study to prepare for workshop",
+    "section": "Volcano plots",
+    "text": "Volcano plots\n\n\nVolcano plots often used to visualise the results of differential expression analysis\nThey are just a scatter of the corrected p value against the fold change….\nalmost - the we actually plot the negative log of the corrected p value against the fold change"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#volcano-plots-2",
+    "href": "omics/week-5/study_before_workshop.html#volcano-plots-2",
+    "title": "Independent Study to prepare for workshop",
+    "section": "Volcano plots",
+    "text": "Volcano plots\n\n\nThis is because just plotting the p-value means the axis is counter intuitive. Small p-values (i.e., significant values) are at the bottom of the axis)\nAnd since p-values range from 1 to very tiny the points are all squashed at the bottom of the axis\n\n\n\nVolcano plot FDR against fold change"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#volcano-plots-3",
+    "href": "omics/week-5/study_before_workshop.html#volcano-plots-3",
+    "title": "Independent Study to prepare for workshop",
+    "section": "Volcano plots",
+    "text": "Volcano plots\n\n\nPlotting the negative log of the corrected p-value means that the values are spread out and the significant values are at the top of the axis\n\n\n\nVolcano plot -log(FDR) against fold change"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#visualisations",
+    "href": "omics/week-5/study_before_workshop.html#visualisations",
+    "title": "Independent Study to prepare for workshop",
+    "section": "Visualisations",
+    "text": "Visualisations\n\nShould be done on normalised data so meaningful comparisons can be made\nThe 🐭 mouse data were already log2normalised\nThe 🐸 frog data were normalised by the DE method and saved to file. We will log2 transform before doing visualisations"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#heatmaps-2",
+    "href": "omics/week-5/study_before_workshop.html#heatmaps-2",
+    "title": "Independent Study to prepare for workshop",
+    "section": "Heatmaps",
+    "text": "Heatmaps\n\n\nOn the vertical axis are genes which are differentially expressed at the 0.01 level\nOn the horizontal axis are samples\nWe can see that the FGF-treated samples cluster together and the control samples cluster together\nWe can also see two clusters of genes; one of these shows genes upregulated (more yellow) in the FGF-treated samples and the other shows genes downregulated (more blue) in the FGF-treated samples"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#heatmaps-3",
+    "href": "omics/week-5/study_before_workshop.html#heatmaps-3",
+    "title": "Independent Study to prepare for workshop",
+    "section": "Heatmaps",
+    "text": "Heatmaps\n\n\nOn the vertical axis are genes which are differentially expressed at the 0.01 level\nOn the horizontal axis are samples\nWe can see that the FGF-treated samples cluster together and the control samples cluster together\nWe can also see two clusters of genes; one of these shows genes upregulated (more yellow) in the FGF-treated samples and the other shows genes downregulated (more blue) in the FGF-treated samples"
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#pca-3",
+    "href": "omics/week-5/study_before_workshop.html#pca-3",
+    "title": "Independent Study to prepare for workshop",
+    "section": "PCA",
+    "text": "PCA\n\nPCA is a solution for this - It takes a large number of continuous variables (like gene expression) and reduces them to a smaller number of “principal components” that explain most of the variation in the data.\n\n\n\n\n\n\nSamples\n\n\n\n\n\n\nCells\n\n\n\n\n\nWe have done PCA in Omics 3, but often PCA might be one of the first exploratory steps because it gives you an idea whether you expect general patterns in gene expression that distinguish groups."
+  },
+  {
+    "objectID": "omics/week-5/study_before_workshop.html#section-1",
+    "href": "omics/week-5/study_before_workshop.html#section-1",
+    "title": "Independent Study to prepare for workshop",
+    "section": "",
+    "text": "Heat map for the frog data\n\n\nSee next slide for information"
   }
 ]
\ No newline at end of file
diff --git a/_site/site_libs/quarto-html/quarto-html.min.css b/_site/site_libs/quarto-html/quarto-html.min.css
index 8b13789..c2857c3 100644
--- a/_site/site_libs/quarto-html/quarto-html.min.css
+++ b/_site/site_libs/quarto-html/quarto-html.min.css
@@ -1 +1 @@
-
+/*# sourceMappingURL=0a6b880beb84f9b6f36107a76f82c5b1.css.map */
diff --git a/omics/crib/cont-fgf-s30.R b/omics/crib/cont-fgf-s30.R
index c648943..18e4bb4 100644
--- a/omics/crib/cont-fgf-s30.R
+++ b/omics/crib/cont-fgf-s30.R
@@ -458,50 +458,20 @@ s30_log2_trans <- s30_results |>
 colnames(s30_log2_trans) <- s30_results$xenbase_gene_id
 
 # just for indep study before
-# s30_log2_trans$sample <- row.names(s30_log2_trans)
-# a <- s30_log2_trans |> ggplot(aes(x = `XB-GENE-1000007`, 
+# sample_id <- row.names(s30_log2_trans) |> str_remove("log2_")
+# fig <- s30_log2_trans |> ggplot(aes(x = `XB-GENE-1000007`,
 #                              y = `XB-GENE-1000023`)) +
 #   geom_point() +
-#   geom_text(aes(label = sample), 
+#   geom_text(aes(label = sample_id),
 #             vjust = -1, size = 3) +
 #   scale_x_continuous(expand = c(0.05,0.05)) +
 #   scale_y_continuous(expand = c(0.05,0.05)) +
 #   theme_classic()
 # 
 # 
-# b <- s30_log2_trans |> ggplot(aes(x = `XB-GENE-1000062`, 
-#                                   y = `XB-GENE-1000072`)) +
-#   geom_point() +
-#   geom_text(aes(label = sample), 
-#             vjust = -1, size = 3) +
-#   scale_x_continuous(expand = c(0.05,0.05)) +
-#   scale_y_continuous(expand = c(0.05,0.05)) +
-#   theme_classic()
-# 
-# c <- s30_log2_trans |> ggplot(aes(x = `XB-GENE-1000113`, 
-#                                   y = `XB-GENE-1000132`)) +
-#   geom_point() +
-#   geom_text(aes(label = sample), 
-#             vjust = -1, size = 3) +
-#   scale_x_continuous(expand = c(0.05,0.05)) +
-#   scale_y_continuous(expand = c(0.05,0.05)) +
-#   theme_classic()
-# 
-# d <- s30_log2_trans |> ggplot(aes(x = `XB-GENE-1000149`, 
-#                                   y = `XB-GENE-1000251`)) +
-#   geom_point() +
-#   geom_text(aes(label = sample), 
-#             vjust = -1, size = 3) +
-#   scale_x_continuous(expand = c(0.05,0.05)) +
-#   scale_y_continuous(expand = c(0.05,0.05)) +
-#   theme_classic()
-# 
-# library(patchwork)
-# fig <- (a + b) / (c + d)
-# 
-# ggsave("omics/week-5/images/why_pca.png", 
+# ggsave("omics/week-5/images/why_pca_frog.png",
 #        plot = fig,
-#        width = 6, height = 6)
+#        width = 4, height = 4)
 
 # perform PCA using standard functions
 pca <- s30_log2_trans |>
diff --git a/omics/crib/hspc-prog.R b/omics/crib/hspc-prog.R
index 637a7c4..bd2cf4c 100644
--- a/omics/crib/hspc-prog.R
+++ b/omics/crib/hspc-prog.R
@@ -509,6 +509,30 @@ prog_hspc_trans <- prog_hspc_results |>
 
 colnames(prog_hspc_trans) <- prog_hspc_results$ensembl_gene_id
 
+# just for indep study before
+# prog_hspc_trans$cell_id <- row.names(prog_hspc_trans)
+# prog_hspc_trans <- prog_hspc_trans |> 
+#   extract(cell_id, 
+#           remove = FALSE,
+#           c("cell_type", "cell_number"),
+#           "([a-zA-Z]{4})_([0-9]{3})")
+# 
+# fig <- prog_hspc_trans |> ggplot(aes(x = ENSMUSG00000028639,
+#                              y = ENSMUSG00000024053, colour = cell_type)) +
+#   geom_point() +
+#   # geom_text(aes(label = cell_id),
+#   #           vjust = -1, size = 3) +
+#   scale_x_continuous(expand = c(0.05,0.05)) +
+#   scale_y_continuous(expand = c(0.05,0.05)) +
+#   theme_classic() +
+# theme(legend.position = "none")
+# 
+# 
+# ggsave("omics/week-5/images/why_pca_mouse.png",
+#        plot = fig,
+#        width = 4, height = 4)
+
+
 # perform PCA using standard functions
 pca <- prog_hspc_trans |>
   prcomp(scale. = TRUE) 
@@ -625,3 +649,16 @@ ggsave("omics/week-5/figures/prog-hspc-volcano.png",
        units = "in",
        device = "png")
 
+
+# # just for the independent study slides
+# vol <- prog_hspc_results |> 
+#   ggplot(aes(x = summary.logFC, 
+#              y = FDR)) +
+#   geom_point() +
+#   theme_classic() 
+# ggsave("omics/week-5/images/volcano-why.png",
+#        plot = vol,
+#        height = 4.5, 
+#        width = 4.5,
+#        units = "in",
+#        device = "png")
\ No newline at end of file
diff --git a/omics/week-5/images/frog-heat.png b/omics/week-5/images/frog-heat.png
new file mode 100644
index 0000000..8cf1b5f
Binary files /dev/null and b/omics/week-5/images/frog-heat.png differ
diff --git a/omics/week-5/images/volcano-why.png b/omics/week-5/images/volcano-why.png
new file mode 100644
index 0000000..26b786b
Binary files /dev/null and b/omics/week-5/images/volcano-why.png differ
diff --git a/omics/week-5/images/why_pca.png b/omics/week-5/images/why_pca.png
deleted file mode 100644
index dd45e6c..0000000
Binary files a/omics/week-5/images/why_pca.png and /dev/null differ
diff --git a/omics/week-5/images/why_pca_frog.png b/omics/week-5/images/why_pca_frog.png
new file mode 100644
index 0000000..14bb9f6
Binary files /dev/null and b/omics/week-5/images/why_pca_frog.png differ
diff --git a/omics/week-5/images/why_pca_mouse.png b/omics/week-5/images/why_pca_mouse.png
new file mode 100644
index 0000000..04ef843
Binary files /dev/null and b/omics/week-5/images/why_pca_mouse.png differ
diff --git a/omics/week-5/overview.qmd b/omics/week-5/overview.qmd
index 60313b4..80982a4 100644
--- a/omics/week-5/overview.qmd
+++ b/omics/week-5/overview.qmd
@@ -5,7 +5,7 @@ toc: true
 toc-location: right
 ---
 
-This week we cover how to visualise and interpret the results of your differential expression analysis. The independent study will allow you to check you have what you should have following the [Omics 2: Statistical Analysis workshop](../week-4/workshop.html) and [Consolidation study](../week-4/study_after_workshop.html). It will also summarise the the methods and plots we will go through in the workshop. In the workshop, we will learn how to conduct a Principle Component Analysis (PCA) and plot the results as well as how to create a nicely formatted Volcano plot and heatmap. We will also consider three factors that help us choose an interesting/important gene: the absolute expression, the fold change and the adjusted p-value. 
+This week we cover how to visualise and interpret the results of your differential expression analysis. The independent study will allow you to check you have what you should have following the [Omics 2: Statistical Analysis workshop](../week-4/workshop.html) and [Consolidation study](../week-4/study_after_workshop.html). It will also summarise the the methods and plots we will go through in the workshop. In the workshop, we will learn how to conduct a Principle Component Analysis (PCA) and plot the results as well as how to create a nicely formatted Volcano plot and heatmap.  
 
 We suggest you sit together with your group in the workshop.
 
@@ -14,12 +14,11 @@ We suggest you sit together with your group in the workshop.
 The successful student will be able to:
 
 -   verify they have the required RStudio Project set up and the data and code files from the previous Workshop and Consolidation study
--   explain 
--   
--   
--   
--   
--   ,
+-   explain where gene information came from and add it to their results
+-   perform a PCA and understand how to interpret them
+-   create a heatmap and understand how to interpret them
+-   create a volcano plot and understand how to interpret them
+  
 
 ### Instructions
 
@@ -29,11 +28,13 @@ The successful student will be able to:
 
 2.  [Workshop](workshop.qmd)
 
-    i.  💻 ....
+    i.  💻 Add gene information to the results of DE
 
-    ii. 💻 ...
+    ii. 💻 Perform and plot a PCA
 
-    iii. 💻 ....
+    iii. 💻 Visualise results with a heatmap
+    
+    iv. 💻 Visualise all the results with a volcano plot
 
     iv. Look after future you!
 
diff --git a/omics/week-5/study_before_workshop.qmd b/omics/week-5/study_before_workshop.qmd
index 9a84a68..68198fd 100644
--- a/omics/week-5/study_before_workshop.qmd
+++ b/omics/week-5/study_before_workshop.qmd
@@ -15,6 +15,12 @@ editor:
     wrap: 72
 ---
 
+```{r}
+#| include: false
+library(tidyverse)
+
+```
+
 ## Overview
 
 In these slides we will:
@@ -22,9 +28,11 @@ In these slides we will:
 ::: incremental
 -   Check where you are
 
--   learn some concepts
+-   learn some concepts used omics visualisation
 
-    -   
+    -   Principle Component Analysis (PCA)
+    -   Volcano plots
+    -   Heatmaps
 
 -   Find out what packages to install before the workshop
 :::
@@ -34,13 +42,11 @@ In these slides we will:
 ## What we did in Omics 2: Statistical Analysis
 
 ::: incremental
-::: {style="font-size: 90%;"}
--   
+-   carried out differential expression analysis
 
--   
+-   found genes not expressed at all, or expressed in one group only
 
--   Saved files .
-:::
+-   Saved results files
 :::
 
 ## Where should you be?
@@ -56,14 +62,16 @@ Workshop](../week-4/workshop.html) including:
 
 ## 🐸 Frogs
 
-::: {style="font-size: 90%;"}
+::: {style="font-size: 70%;"}
 -   An RStudio Project called `frogs-88H` which contains:
     -   Raw data (S14, S20 and S30)
     -   Processed data (`s30_filtered.csv`, `s30_summary_gene.csv`,
         `s30_summary_gene_filtered.csv`, `s30_summary_samp.csv` and
         equivalents for S14 *OR* S20)
-    -   Two scripts called `cont-fgf-s30.R` and `cont-fgf-s20.R` *OR*
-        `cont-fgf-s14.R`
+    -   Results files (`s30_fgf_only.csv`, `S30_normalised_counts.csv`,
+        `S30_results.csv` and equivalents for S14 *OR* S20)\
+    -   Two scripts called `cont-fgf-s30.R` and either `cont-fgf-s20.R`
+        *OR* `cont-fgf-s14.R`
 :::
 
 Files should be organised into folders. Code should well commented and
@@ -71,12 +79,18 @@ easy to read.
 
 ## 🐭 Mice
 
+::: {style="font-size: 70%;"}
 -   An RStudio Project called `mice-88H` which contains
     -   Raw data (hspc, prog, lthsc)
     -   Processed data (`hspc_summary_gene.csv`,
         `hspc_summary_samp.csv`, `prog_summary_gene.csv`,
-        `prog_summary_samp.csv`)
--   One script called `hspc-prog.R`
+        `prog_summary_samp.csv`, `lthsc_summary_gene.csv`,
+        `lthsc_summary_samp.csv`)
+-   Results files (`prog_hspc_results.csv` and an equivalent for lthsc
+    vs prog or hspc vs lthsc)
+-   Two scripts called `hspc-prog.R` and either `hspc-lthsc.R` *OR*
+    `prog-lthsc.R`
+:::
 
 Files should be organised into folders. Code should well commented and
 easy to read.
@@ -101,89 +115,294 @@ Go through:
 
 ## Examine the results files
 
+Remind yourself of the key columns you have in the results files:
+
+-   a log~2~ fold change
+-   an unadjusted *p*-value
+-   a *p* value adjusted for multiple testing (`FDR` or `padj`)
+-   a gene id
+
+## 🐸 Frogs
+
+```{r}
+#| echo: false
+read_csv("results/s30_results.csv") |> glimpse()
+
+```
+
+. . .
+
+-   `baseMean` is the mean of the normalised counts for the gene across
+    all samples
+-   `lfcSE` standard error of the fold change
+-   `stat` is the test statistic (the Wald statistic)
+-   Generated by **`DESeq2`** [@DESeq2]
+
+## 🐭 Mice
+
+```{r}
+#| echo: false
+read_csv("results/prog_hspc_results.csv") |> glimpse()
+
+
+```
+
+. . .
 
+-   Top is the rank of the gene ordered by the *p*-value (smallest
+    first)
+-   `summary.logFC` and `logFC.hspc` give the same value (in this case
+    since comparing two cell types)
+-   generated by **`scran`** [@scran]
 
 # Adding gene information
 
-## from xenbase
+## Adding gene information
+
+::: incremental
+-   The gene id is difficult to interpret in plots/tables
+
+-   Therefore we need to add information such as the gene name and a
+    description to the results
+
+-   For the 🐸 Frog data information comes from Xenbase [@fisher2023]
+
+-   For the 🐭 Mice data information comes from Ensembl [@birney2004]
+:::
+
+## 🐸 Xenbase
+
+![xenbase logo](images/Xenbase-Logo-Medium.png){width="800"}
+
+[Xenbase](http://www.xenbase.org/) is a model organism database that
+provides genomic, molecular, and developmental biology information about
+*Xenopus laevis* and *Xenopus tropicalis*.
+
+. . .
+
+It took me some time to find the information you need.
+
+## 🐸 Xenbase
+
+::: incremental
+-   I got the information from the [Xenbase information
+    pages](https://www.xenbase.org/xenbase/static-xenbase/ftpDatafiles.jsp)
+    under Data Reports \| Gene Information
+
+-   This is listed: Xenbase Gene Product Information \[readme\] [gzipped
+    gpi (tab
+    separated)](https://download.xenbase.org/xenbase/GenePageReports/xenbase.gpi.gz)
+
+-   Click on the readme link to see the file format and columns
+
+-   I downloaded
+    [xenbase.gpi.gz](https://download.xenbase.org/xenbase/GenePageReports/xenbase.gpi.gz),
+    unzipped it, removed header lines and the *Xenopus tropicalis*
+    (taxon:8364) entries and saved it as
+    [xenbase_info.xlsx](meta/xenbase_info.xlsx)
+
+-   In the workshop you will import this file and merge the information
+    with the results file
+:::
+
+## 🐭 Ensembl
+
+::: incremental
+-   [Ensembl](https://www.ensembl.org/index.html) creates, integrates
+    and distributes reference datasets and analysis tools that enable
+    genomics
+
+-   [BioMart](https://grch37.ensembl.org/info/data/biomart/index.html)
+    provides a access to these large datasets
+
+-   **`biomaRt`** [@biomaRt] is a Bioconductor package gives you
+    programmatic access to BioMart.
+
+-   In the workshop you use this package to get information you can
+    merge with the results file
+:::
+
+# Plots
+
+## What is the purpose of an Omics plot?
+
+::: incremental
+-   In general, we plot data to help us summarise and understand it
+
+-   This is especially import for omics data where we have a very large
+    number of variables and often a large number of observations
+
+-   We will look at three plots very commonly used in omics analysis:
+    Principal Component Analysis (PCA) plot, Heatmaps and Volcano Plots
+:::
+
+# Principal Component Analysis (PCA)
+
+## PCA
+
+::: incremental
+-   Principal Component Analysis is an unsupervised machine learning
+    technique
+
+-   Unsupervised methods[^1] are unsupervised in that they do not
+    use/optimise to a particular output. The goal is to uncover
+    structure. They do not test hypotheses
+
+-   It is often used to visualise high dimensional data because it is a
+    dimension reduction technique
+:::
+
+[^1]: You may wish to read a previous introduction to unsupervised
+    methods I have written [An introduction to Machine Learning:
+    Unsupervised
+    methods](https://3mmarand.github.io/BIO00058M-Data-science-2020/slides/05_intro_to_ML_unsupervised.html#1)
+    [@rand2021]
+
+## PCA
+
+::: incremental
+-   It takes a large number of continuous variables (like gene
+    expression) and reduces them to a smaller number of variables
+    (called principal components) that explain most of the variation in
+    the data
 
+-   The principal components can be plotted to see how samples cluster
+    together
+:::
 
-![xenbase logo](images/Xenbase-Logo-Medium.png){width="700"}
+## PCA
+::: {style="font-size: 70%;"}
+-   To see if samples cluster as we would expect, we might plot the
+    expression of one gene against another
 
+::: {layout-ncol="2"}
+![Samples](images/why_pca_frog.png){width="300"}
 
-Xenbase (http://www.xenbase.org/, RRID:SCR_003280)
+![Cells](images/why_pca_mouse.png){width="300"}
+:::
 
-Xenbase is a model organism database that provides genomic, molecular,
-and developmental biology information about Xenopus laevis and Xenopus
-tropicalis. Xenbase is funded by the National Institutes of Health
-(NIH) and the National Science Foundation (NSF).
+This gives some insight but we have 280 (mice) or 10,000+(frogs) genes
+to consider. How do we know if the pair we use is typical? How can we
+consider al the genes at once?
 
-our data gives the xenbase gene id so we are using xenbase to get the information
-a lot of the information would also be in the ncbi
+:::
 
-## from the ncbi
+## PCA
 
-biomart is a package that allows you to get information from the ncbi
-database such as gene names and descriptions
+::: {style="font-size: 70%;"}
+-   PCA is a solution for this - It takes a large number of continuous
+    variables (like gene expression) and reduces them to a smaller
+    number of "principal components" that explain most of the variation
+    in the data.
 
+::: {layout-ncol="2"}
 
+![Samples](figures/frog-s30-pca.png){width="300"}
 
+![Cells](figures/prog_hspc-pca.png){width="300"}
+:::
 
 
+:::
+## PCA
 
-# Plots 
+We have done PCA in Omics 3, but often PCA might be one of the first
+exploratory steps because it gives you an idea whether you expect
+general patterns in gene expression that distinguish groups.
 
-## plots purpose
+# Heatmaps
 
-dimsenion reduction
+## Heatmaps
 
-## pca
+::: incremental
+-   are a grid of genes on one axis and samples on the other with each
+    grid cell coloured by another variable
+
+-   in this case the other variable is gene expression
 
-lots of variables 
+-   they allow you to quickly get an overview of the expression patterns
+    across genes and samples
 
-## tsne
+-   we often couple them with clustering to group genes and samples with
+    similar expression patterns together which helps us see which genes
+    are responsible for distinguishing groups
+:::
 
-lots of variables and lots of observations
+## 
 
+![Heat map for the frog data](images/frog-heat.png){height="800"}
 
+See next slide for information
 
-# normalsing before plotting
+## Heatmaps
+
+::: incremental
+-   On the vertical axis are genes which are differentially expressed at
+    the 0.01 level
 
-## normalising before plotting
+-   On the horizontal axis are samples
 
-log 
+-   We can see that the FGF-treated samples cluster together and the
+    control samples cluster together
 
-normalisation
-regularised log is a method to bias from low count genes.
-https://hbctraining.github.io/DGE_workshop_salmon_online/lessons/03_DGE_QC_analysis.html
+-   We can also see two clusters of genes; one of these shows genes
+    upregulated (more yellow) in the FGF-treated samples and the other
+    shows genes downregulated (more blue) in the FGF-treated samples
+:::
 
+# Volcano plots
 
+## Volcano plots
 
 ::: incremental
--   T
+-   Volcano plots often used to visualise the results of differential
+    expression analysis
+
+-   They are just a scatter of the corrected p value against the fold
+    change....
+
+-   almost - the we actually plot the negative log of the corrected p
+    value against the fold change
 :::
 
-rlog is a method to bias from low count genes.
-https://hbctraining.github.io/DGE_workshop_salmon_online/lessons/03_DGE_QC_analysis.html
-gives a good explanation of regularized the log transform (rlog)
+## Volcano plots
 
-The rlog transformation of the normalized counts is only necessary for
-these visualization methods during this quality assessment. They are not
-used for DE because DESeq2 takes care of that
+::: {style="font-size: 70%;"}
+-   This is because just plotting the *p*-value means the axis is
+    counter intuitive. Small *p*-values (i.e., significant values) are
+    at the bottom of the axis)
 
-in the workshop we just to log transformed
+-   And since *p*-values range from 1 to very tiny the points are all
+    squashed at the bottom of the axis
+:::
+
+![Volcano plot FDR against fold change](images/volcano-why.png)
 
--   The 🐭 mouse data have been normalised to simplify the analysis for
-    you; the 🐸 frog data have not but the DE method will do this for
-    you.
-   
+## Volcano plots
 
+::: {style="font-size: 70%;"}
+-   Plotting the negative log of the corrected *p*-value means that the
+    values are spread out and the significant values are at the top of
+    the axis
+:::
 
+![Volcano plot -log(FDR) against fold
+change](figures/prog-hspc-volcano.png)
 
+## Visualisations
+
+-   Should be done on normalised data so meaningful comparisons can be
+    made
+
+-   The 🐭 mouse data were already log~2~normalised
+
+-   The 🐸 frog data were normalised by the DE method and saved to file.
+    We will log~2~ transform before doing visualisations
 
 ## Packages to install before the workshop
 
-**`heatmaply`** **`ggrepel`** from CRAN in the the normal way:
+**`heatmaply`** [@heatmapply] and **`ggrepel`** [@ggrepel] from CRAN in
+the the normal way:
 
 ```{r}
 #| eval: false
@@ -193,15 +412,14 @@ install.packages("ggrepel")
 
 ```
 
+**`biomaRt`** [@biomaRt] from Bioconductor using **`BiocManager`**
+[@BiocManager]
 
-**`biomaRt`** from Bioconductor using **`BiocManager`**:
 ```{r}
 #| eval: false
 BiocManager::install("biomaRt")
 ```
 
-
-
 # Workshops
 
 ## Workshops
diff --git a/omics/week-5/workshop.qmd b/omics/week-5/workshop.qmd
index 21bd7c3..69ee793 100644
--- a/omics/week-5/workshop.qmd
+++ b/omics/week-5/workshop.qmd
@@ -136,9 +136,9 @@ The [xenbase.gpi.gz](https://download.xenbase.org/xenbase/GenePageReports/xenbas
 If you click on the readme link you can see information telling you that the file is in the Gene Product information 2.1 format and is provided with gzip compression. gene product information for both 
 *Xenopus tropicalis* (taxon:8364) and *Xenopus laevis* (taxon:8355)
 
-🎬 ......
+🎬 If you want to 
 
-```{bash}
+```bash
 gunzip xenbase.gpi.gz
 less xenbase.gpi
 q
@@ -346,6 +346,7 @@ n_gene_clusters <- 2
 ```
 
 ```{r}
+#| fig-height: 8
 heatmaply(mat, 
           scale = "row",
           hide_colorbar = TRUE,
@@ -361,7 +362,7 @@ heatmaply(mat,
 
 
 
-## Visual all the results with a volcano plot
+## Visualise all the results with a volcano plot
 
 
 colour the points if padj < 0.05
@@ -706,7 +707,7 @@ separation is not as strong as for the frog data
 run a few times to see different subset
 
 
-## Visual all the results with a volcano plot
+## Visualise all the results with a volcano plot
 
 colour the points if FDR < 0.05
 and prog_hspc_results > 1
diff --git a/references.bib b/references.bib
index 52f2ca1..fa3ab82 100644
--- a/references.bib
+++ b/references.bib
@@ -476,3 +476,67 @@ @article{benjamini1995
 	url = {http://www.jstor.org/stable/2346101},
 	note = {Publisher: [Royal Statistical Society, Wiley]}
 }
+
+@article{fisher2023,
+	title = {Xenbase: key features and resources of the Xenopus model organism knowledgebase},
+	author = {Fisher, Malcolm and James-Zorn, Christina and Ponferrada, Virgilio and Bell, Andrew J and Sundararaj, Nivitha and Segerdell, Erik and Chaturvedi, Praneet and Bayyari, Nadia and Chu, Stanley and Pells, Troy and Lotay, Vaneet and Agalakov, Sergei and Wang, Dong Zhuo and Arshinoff, Bradley I and Foley, Saoirse and Karimi, Kamran and Vize, Peter D and Zorn, Aaron M},
+	year = {2023},
+	month = {05},
+	date = {2023-05-02},
+	journal = {Genetics},
+	pages = {iyad018},
+	volume = {224},
+	number = {1},
+	doi = {10.1093/genetics/iyad018},
+	url = {https://doi.org/10.1093/genetics/iyad018}
+}
+
+@article{birney2004,
+	title = {An Overview of Ensembl},
+	author = {Birney, Ewan and Andrews, T. Daniel and Bevan, Paul and Caccamo, Mario and Chen, Yuan and Clarke, Laura and Coates, Guy and Cuff, James and Curwen, Val and Cutts, Tim and Down, Thomas and Eyras, Eduardo and Fernandez-Suarez, Xose M. and Gane, Paul and Gibbins, Brian and Gilbert, James and Hammond, Martin and Hotz, Hans-Rudolf and Iyer, Vivek and Jekosch, Kerstin and Kahari, Andreas and Kasprzyk, Arek and Keefe, Damian and Keenan, Stephen and Lehvaslaiho, Heikki and McVicker, Graham and Melsopp, Craig and Meidl, Patrick and Mongin, Emmanuel and Pettett, Roger and Potter, Simon and Proctor, Glenn and Rae, Mark and Searle, Steve and Slater, Guy and Smedley, Damian and Smith, James and Spooner, Will and Stabenau, Arne and Stalker, James and Storey, Roy and Ureta-Vidal, Abel and Woodwark, K. Cara and Cameron, Graham and Durbin, Richard and Cox, Anthony and Hubbard, Tim and Clamp, Michele},
+	year = {2004},
+	month = {05},
+	date = {2004-05},
+	journal = {Genome Research},
+	pages = {925--928},
+	volume = {14},
+	number = {5},
+	doi = {10.1101/gr.1860604},
+	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC479121/},
+	note = {PMID: 15078858
+PMCID: PMC479121}
+}
+
+@article{biomaRt,
+	title = {Mapping identifiers for the integration of genomic datasets with the R/Bioconductor package biomaRt},
+	author = {Durinck, Steffen and Spellman, Paul T. and Birney, Ewan and Huber, Wolfgang},
+	year = {2009},
+	date = {2009},
+	volume = {4}
+}
+@book{rand2021,
+	title = {Data Science strand of BIO00058M},
+	author = {Rand, Emma},
+	year = {2021},
+	month = {09},
+	date = {2021-09},
+	doi = {10.5281/zenodo.5527705},
+	url = {https://github.com/3mmaRand/BIO00058M-Data-science-2020}
+}
+
+  @Manual{BiocManager,
+    title = {BiocManager: Access the Bioconductor Project Package Repository},
+    author = {Martin Morgan and Marcel Ramos},
+    year = {2023},
+    note = {R package version 1.30.22},
+    url = {https://bioconductor.github.io/BiocManager/},
+  }
+  
+  @Manual{ggrepel,
+    title = {ggrepel: Automatically Position Non-Overlapping Text Labels with
+'ggplot2'},
+    author = {Kamil Slowikowski},
+    year = {2023},
+    note = {R package version 0.9.4},
+    url = {https://github.com/slowkow/ggrepel},
+  }
\ No newline at end of file