From 21f989d3b043278291c4061acb2b3e64ff6357ca Mon Sep 17 00:00:00 2001
From: Emma Rand <7593411+3mmaRand@users.noreply.github.com>
Date: Fri, 15 Dec 2023 12:45:46 +0000
Subject: [PATCH] kelly proj workflow

---
 _quarto.yml                     |   5 +
 omics/kelly/Rplot001.jpg        | Bin 0 -> 4227 bytes
 omics/kelly/data-raw/mol_wt.txt |   9 +
 omics/kelly/data-raw/vfa.csv    |  61 +++++
 omics/kelly/notes.txt           |  20 ++
 omics/kelly/workshop.qmd        | 463 ++++++++++++++++++++++++++++++++
 renv.lock                       |  96 +++----
 7 files changed, 592 insertions(+), 62 deletions(-)
 create mode 100644 omics/kelly/Rplot001.jpg
 create mode 100644 omics/kelly/data-raw/mol_wt.txt
 create mode 100644 omics/kelly/data-raw/vfa.csv
 create mode 100644 omics/kelly/notes.txt
 create mode 100644 omics/kelly/workshop.qmd

diff --git a/_quarto.yml b/_quarto.yml
index f011f6c..5281b33 100644
--- a/_quarto.yml
+++ b/_quarto.yml
@@ -138,6 +138,11 @@ website:
                text: Workshop
              - href: omics/week-5/study_after_workshop.qmd
                text: Consolidate!  
+          - text: ---
+          - section: "Kelly's Project"
+            contents: 
+             - href: omics/kelly/workshop.qmd
+               text: Workshop
     - title: "Images"
       style: "floating"
       contents: 
diff --git a/omics/kelly/Rplot001.jpg b/omics/kelly/Rplot001.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fd0b397d529171826b48cace63d93852a41d06bd
GIT binary patch
literal 4227
zcmex=<NpH&0WUXCHwH#V1_nkTWcYuZ!I^=Bjg6g+m4ls~os*M;i${c)hnt&6Qb?Fz
zL{>^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<<Uft5x!pKI?*u)5A2qOn4Cl5D|1TU|I
zk)n*E5y{~H0R}-1#s`cKm>HEAm;@P_1sVSzVUP#9la&z+7@&ZWiJ66!jh%y&iyNq5
zs{jKNBQrA-3o|P#3ky(nEl{3;MUYiU(a@1iI53f2sZhkIapFP_Wv7h?MT0JWP%%y_
zYU1P)6PJ*bQdLve(9|+9H8Z!cv~qTFb#wRd^a>6M4GWKmj7m;PO-s+n%qlJ^Ei136
ztZHs)ZENr7?3y%r%G7DoXUv?nXz`Mz%a*TLxoXqqEnBy3-?4Mop~FXx9y@;G<f%)S
zuUx%${l?8(4<9{#^7PsB7cXCZ{Pg+D*Kgl{{QL#-7b62R#9KfT;xROT2?G7a#KOYN
z!VdBmBU3pLGYhh?DjKp0IR>&P778mFHFAhJO<cI~Ag8i%&<D|^qKjN&DkcwAKZ3jl
z_8D;=Ya+{MaE~GUb&G+AnGqOy%z_N|4BvlgENb8y#iL;`ng&MGz-Ss6O#`E8U^ESk
Xrh(BkFq#HN)4*sN7;$O9{{Kw?pL}|W

literal 0
HcmV?d00001

diff --git a/omics/kelly/data-raw/mol_wt.txt b/omics/kelly/data-raw/mol_wt.txt
new file mode 100644
index 0000000..83d15fe
--- /dev/null
+++ b/omics/kelly/data-raw/mol_wt.txt
@@ -0,0 +1,9 @@
+vfa mw
+Acetate	60.05
+Propanoate	74.08
+Isobutyrate	88.11
+Butyrate	88.11
+Isopentanoate	102.13
+Pentanoate	102.13
+Isohexanoate	116.1583
+Hexanoate	116.1583
diff --git a/omics/kelly/data-raw/vfa.csv b/omics/kelly/data-raw/vfa.csv
new file mode 100644
index 0000000..5096520
--- /dev/null
+++ b/omics/kelly/data-raw/vfa.csv
@@ -0,0 +1,61 @@
+Sample - Replicate,Time (day),Acetate,Propanoate,Isobutyrate,Butyrate,Isopentanoate,Pentanoate,Isohexanoate,Hexanoate
+CN10-1,1,17.239,1.126,0.733,0.538,0.52,0.076,0.025,0.007
+CN10-2,1,19.805,1.153,0.736,0.731,0.766,0.098,0.022,0.018
+CN10-3,1,21.043,1.235,0.742,0.717,0.793,0.07,0.019,0.023
+CN10-1,3,47.847,4.195,0.812,1.465,1.034,0.082,0.058,0.044
+CN10-2,3,56.541,4.596,1.193,1.452,1.257,0.096,0.012,0.081
+CN10-3,3,54.29,4.258,1.116,1.384,1.237,0.084,0.032,0.081
+CN10-1,5,66.582,7.176,1.449,2.096,1.137,0.284,0.053,0.027
+CN10-2,5,81.172,8.209,1.927,2.544,1.505,0.317,0.043,0.035
+CN10-3,5,70.528,6.831,1.629,2.202,1.285,0.257,0.016,0.029
+CN10-1,9,76.325,9.799,2.017,3.352,1.45,0.439,0.055,0.042
+CN10-2,9,98.902,11.816,2.675,4.346,1.888,0.579,0.043,0.054
+CN10-3,9,100.199,11.236,2.836,4.164,1.888,0.519,0.047,0.047
+CN10-1,11,82.171,10.703,2.485,3.976,1.625,0.591,0.057,0.08
+CN10-2,11,94.162,11.751,2.719,4.598,1.963,0.649,0.044,0.088
+CN10-3,11,89.181,10.756,2.493,4.137,1.835,0.561,0.034,0.064
+CN10-1,13,101.508,12.57,3.115,4.895,1.949,0.758,0.041,0.114
+CN10-2,13,108.601,13.698,3.31,5.505,2.072,0.814,0.047,0.146
+CN10-3,13,107.873,13.669,3.688,5.447,2.043,0.861,0.068,0.157
+CN10-1,16,100.422,14.564,3.045,5.623,2.105,0.862,0.063,0.126
+CN10-2,16,111.334,15.229,3.06,6.128,2.202,0.859,0.043,0.12
+CN10-3,16,112.261,15.044,3.423,6.106,2.271,0.858,0.046,0.121
+CN10-1,18,96.766,13.804,3.585,5.66,1.988,0.788,0.056,0.099
+CN10-2,18,104.135,14.584,3.38,6.173,2.157,0.827,0.047,0.108
+CN10-3,18,99.257,14.082,2.894,5.712,1.998,0.816,0.04,0.093
+CN10-1,20,90.968,13.733,3.266,5.697,1.974,0.804,0.055,0.119
+CN10-2,20,96.995,13.965,2.346,6.951,2.089,0.853,0.045,0.109
+CN10-3,20,107.952,14.742,3.668,6.546,2.227,0.973,0.042,0.108
+CN10-1,22,89.43,13.57,2.953,5.681,1.898,0.818,0.048,0.08
+CN10-2,22,104.397,14.584,2.946,7.454,2.237,0.918,0.041,0.096
+CN10-3,22,110.657,15.274,4.216,7.192,2.467,1.118,0.047,0.125
+NC-1,1,3.594,0.982,0.395,0.206,0.246,0.031,0.006,0.005
+NC-2,1,1.604,0.927,0.478,0.242,0.314,0.057,0.004,0.011
+NC-3,1,1.503,0.867,0.416,0.236,0.295,0.033,0.003,0.01
+NC-1,3,7.095,1.355,0.413,0.37,0.441,0.086,0.049,0.038
+NC-2,3,7.441,1.353,0.574,0.41,0.503,0.091,0.05,0.05
+NC-3,3,7.011,1.446,0.555,0.417,0.563,0.1,0.057,0.034
+NC-1,5,7.676,1.341,0.68,0.373,0.539,0.075,0.024,0.014
+NC-2,5,10.885,1.931,0.872,0.522,0.763,0.11,0.043,0.019
+NC-3,5,8.89,1.558,0.783,0.426,0.641,0.087,0.037,0.015
+NC-1,9,20.134,3.602,1.405,0.839,1.218,0.231,0.044,0.061
+NC-2,9,19.093,3.479,1.497,0.885,1.112,0.176,0.033,0.032
+NC-3,9,20.491,3.837,1.625,0.875,1.492,0.242,0.051,0.041
+NC-1,11,19.002,3.324,1.56,0.785,1.366,0.201,0.043,0.042
+NC-2,11,16.193,2.783,1.321,0.633,1.205,0.152,0.033,0.033
+NC-3,11,11.179,2.143,1.079,0.493,0.951,0.11,0.025,0.022
+NC-1,13,20.805,3.555,1.863,0.816,1.394,0.217,0.044,0.07
+NC-2,13,20.702,3.572,1.915,0.788,1.446,0.208,0.044,0.07
+NC-3,13,20.187,3.34,1.684,0.638,1.364,0.176,0.034,0.063
+NC-1,16,23.457,4.129,1.754,0.891,1.542,0.208,0.039,0.052
+NC-2,16,23.79,4.419,2.033,0.886,1.695,0.238,0.048,0.076
+NC-3,16,19.692,4.146,1.901,0.682,1.449,0.174,0.045,0.057
+NC-1,18,22.499,3.988,1.543,0.798,1.364,0.162,0.031,0.051
+NC-2,18,26.354,4.468,1.776,0.856,1.568,0.19,0.041,0.069
+NC-3,18,18.685,3.869,1.478,0.6,1.202,0.127,0.033,0.032
+NC-1,20,20.639,3.968,1.626,0.815,1.363,0.167,0.034,0.05
+NC-2,20,17.364,3.724,1.484,0.656,1.222,0.136,0.03,0.027
+NC-3,20,20.528,4.211,1.55,0.635,1.312,0.124,0.041,0.033
+NC-1,22,21.128,3.982,1.86,0.805,1.465,0.22,0.038,0.037
+NC-2,22,24.633,4.165,1.706,0.738,1.524,0.155,0.045,0.39
+NC-3,22,29.288,4.129,1.575,0.639,1.458,0.133,0.052,0.025
diff --git a/omics/kelly/notes.txt b/omics/kelly/notes.txt
new file mode 100644
index 0000000..d7dfd3f
--- /dev/null
+++ b/omics/kelly/notes.txt
@@ -0,0 +1,20 @@
+Notes:																	
+Each sample has three replicates																	
+Concentrations are listed here in mM (millimolar, or "moles / L *  1000 millimoles / 1 moles")																	
+CN samples have straw biomass added to AD vials																	
+NC samples have had water added to AD vials																	
+														
+To calculate from this data		Change in VFA with time	
+		Recalculate the data into grams per liter, and change in VFA g/l with time
+		Calculate the percent representation of each VFA, by mM and by weight				
+
+Data for mM to g/l calculations		
+
+MW (g/mol)	
+Acetate	Propanoate	Isobutyrate	Butyrate	Isopentanoate	Pentanoate	Isohexanoate	Hexanoate
+60.05	  74.08	      88.11	      88.11	    102.13	      102.13	    116.1583	    116.1583
+
+													
+Useful graphs would include 1) mM or mg/l versus time for VFAs, 2) Delta mg/l versus time for each VFA															
+																	
+																	
diff --git a/omics/kelly/workshop.qmd b/omics/kelly/workshop.qmd
new file mode 100644
index 0000000..948b559
--- /dev/null
+++ b/omics/kelly/workshop.qmd
@@ -0,0 +1,463 @@
+---
+title: "Kelly"
+subtitle: "VFAs"
+author: "Emma Rand"
+toc: true
+toc-depth: 4
+toc-location: right
+execute:
+  echo: true
+  include: true
+  error: true
+bibliography: ../../references.bib
+editor: 
+  markdown: 
+    wrap: 72
+---
+
+# Introduction
+
+## Overview
+
+VFAs from AD vials
+
+-   Two treatments: straw (CN10) and water (NC)
+
+-   10 time points: 1, 3, 5, 9, 11, 13, 16, 18, 20, 22
+
+-   three replicates per treatment per time point
+
+-   2 x 10 x 3 = 60 groups 
+
+-   8 VFA with concentration in mM (millimolar): acetate, propanoate, isobutyrate, butyrate, isopentanoate, pentanoate, isohexanoate, hexanoate
+
+
+To calculate from this data
+
+-   Recalculate the data into grams per litre
+    -   convert to molar: 1 millimolar to molar = 0.001 molar
+    -   multiply by the molecular weight of each VFA
+-   Calculate *Change* in VFA g/l with time
+-   Calculate the percent representation of each VFA, by mM and by weight	
+
+## Data files
+
+-   8 VFA in mM for 60 samples [vfa.csv](data-raw/vfa.csv)
+
+-   Molecular weights for each VFA in grams per mole [mol_wt.txt](data-raw/mol_wt.txt)
+
+
+## Getting started
+
+## Set up a Project
+
+🎬 Start RStudio from the Start menu
+
+🎬 Make an RStudio project. Be deliberate about where you create it so
+that it is a good place for you
+
+🎬 Use the Files pane to make new folders for the data. I suggest
+`data-raw` and `data-processed`
+
+🎬 Make a new script called `analysis.R` to carry out the rest of the
+work.
+
+
+🎬 Load `tidyverse` [@tidyverse] for importing, summarising, plotting
+and filtering.
+
+```{r}
+library(tidyverse)
+```
+
+## Examine the data 
+
+🎬 Save the files to `data-raw`. Open them and examine them. You may want to use Excel for the csv file.
+
+🎬 Answer the following questions:
+
+-   What is in the rows and columns of each file?
+-   How many rows and columns are there in each file? 
+-   How are the data organised ? 
+
+
+## Import
+
+
+🎬 Import
+
+```{r}
+vfa_cummul <- read_csv("data-raw/vfa.csv") |> janitor::clean_names()
+```
+
+
+🎬 Split treatment and replicate to separate columns so there is a treatment column:
+
+```{r}
+vfa_cummul <- vfa_cummul |> 
+  separate(col = sample_replicate, 
+           into = c("treatment", "replicate"), 
+           sep = "-",
+           remove = FALSE)
+```
+
+The provided data is cumulative/absolute. We need to calculate the change in VFA with time. There is a function, `lag()` that will help us do this. It will take the previous value and subtract it from the current value. We need to do that separately for each `sample_replicate` so we need to group by `sample_replicate` first. We also need to make sure the data is in the right order so we will arrange by `sample_replicate` and `time_day`.
+
+
+🎬 Create dataframe for the change in VFA
+```{r}
+vfa_delta <- vfa_cummul |> 
+    group_by(sample_replicate)  |> 
+    arrange(sample_replicate, time_day) |>
+    mutate(acetate = acetate - lag(acetate),
+           propanoate = propanoate - lag(propanoate),
+           isobutyrate = isobutyrate - lag(isobutyrate),
+           butyrate = butyrate - lag(butyrate),
+           isopentanoate = isopentanoate - lag(isopentanoate),
+           pentanoate = pentanoate - lag(pentanoate),
+           isohexanoate = isohexanoate - lag(isohexanoate),
+           hexanoate = hexanoate - lag(hexanoate))
+```
+
+Now we have two dataframes, one for the cumulative data and one for the change in VFA. 
+
+
+To make conversions from mM to g/l we need to do mM * 0.001 * MW. We will import the molecular weight data, pivot the VFA data to long format and join the molecular weight data to the VFA data. Then we can calculate the g/l. We will do this for both the cumulative and delta dataframes.
+
+
+🎬 import molecular weight data
+
+```{r}
+mol_wt <- read_table("data-raw/mol_wt.txt") |>
+  mutate(vfa = tolower(vfa))
+```
+
+🎬 Pivot the cumulative data to long format:
+
+
+```{r}
+#| echo: false
+vfa_cummul <- vfa_cummul |> 
+  pivot_longer(cols = -c(sample_replicate,
+                         treatment, 
+                         replicate,
+                         time_day),
+               values_to = "conc_mM",
+               names_to = "vfa") 
+```
+
+View `vfa_cummul` to check you understand what you have done.
+
+🎬 Join molecular weight to data and calculate g/l (mutate to convert to g/l * 0.001 * MW):
+
+```{r}
+vfa_cummul <- vfa_cummul |> 
+  left_join(mol_wt, by = "vfa") |>
+  mutate(conc_g_l = conc_mM * 0.001 * mw)
+```
+
+
+View `vfa_cummul` to check you understand what you have done.
+
+
+🎬  Add a column which is the percent representation of each VFA for mM and g/l:
+```{r}
+vfa_cummul <- vfa_cummul |> 
+  group_by(sample_replicate, time_day) |> 
+  mutate(percent_conc_g_l = conc_g_l / sum(conc_g_l) * 100,
+         percent_conc_mM = conc_mM / sum(conc_mM) * 100)
+
+```
+
+
+
+
+
+🎬 Pivot the change data, `delta_vfa` to long format: 
+
+
+```{r}
+#| echo: false
+vfa_delta <- vfa_delta |> 
+  pivot_longer(cols = -c(sample_replicate,
+                         treatment, 
+                         replicate,
+                         time_day),
+               values_to = "conc_mM",
+               names_to = "vfa") 
+```
+
+View `vfa_delta` to check it looks like `vfa_cummul`
+
+🎬 Join molecular weight to data and calculate g/l (mutate to convert to g/l * 0.001 * MW):
+
+
+```{r}
+#| echo: false
+vfa_delta <- vfa_delta |> 
+  left_join(mol_wt, by = "vfa") |>
+  mutate(conc_g_l = conc_mM * 0.001 * mw)
+```
+
+## Graphs
+
+🎬 Make summary data for graphing
+
+```{r}
+vfa_cummul_summary <- vfa_cummul |> 
+  group_by(treatment, time_day, vfa) |> 
+  summarise(mean_g_l = mean(conc_g_l),
+            se_g_l = sd(conc_g_l)/sqrt(length(conc_g_l)),
+            mean_mM = mean(conc_mM),
+            se_mM = sd(conc_mM)/sqrt(length(conc_mM))) |> 
+  ungroup()
+```
+
+```{r}
+vfa_delta_summary <- vfa_delta |> 
+  group_by(treatment, time_day, vfa) |> 
+  summarise(mean_g_l = mean(conc_g_l),
+            se_g_l = sd(conc_g_l)/sqrt(length(conc_g_l)),
+            mean_mM = mean(conc_mM),
+            se_mM = sd(conc_mM)/sqrt(length(conc_mM))) |> 
+  ungroup()
+```
+
+🎬 Graph the cumulative data, grams per litre:
+```{r}
+
+vfa_cummul_summary |> 
+  ggplot(aes(x = time_day, colour = vfa)) +
+  geom_line(aes(y = mean_g_l), 
+            linewidth = 1) +
+  geom_errorbar(aes(ymin = mean_g_l - se_g_l,
+                    ymax = mean_g_l + se_g_l),
+                width = 0.5, 
+                show.legend = F,
+                linewidth = 1) +
+  scale_color_viridis_d(name = NULL) +
+  scale_x_continuous(name = "Time (days)") +
+  scale_y_continuous(name = "Mean VFA concentration (g/l)") +
+  theme_bw() +
+  facet_wrap(~treatment) +
+  theme(strip.background = element_blank())
+
+
+
+```
+
+🎬 Graph the change data, grams per litre:
+
+```{r}
+
+vfa_delta_summary |> 
+  ggplot(aes(x = time_day, colour = vfa)) +
+  geom_line(aes(y = mean_g_l), 
+            linewidth = 1) +
+  geom_errorbar(aes(ymin = mean_g_l - se_g_l,
+                    ymax = mean_g_l + se_g_l),
+                width = 0.5, 
+                show.legend = F,
+                linewidth = 1) +
+  scale_color_viridis_d(name = NULL) +
+  scale_x_continuous(name = "Time (days)") +
+  scale_y_continuous(name = "Mean change in VFA concentration (g/l)") +
+  theme_bw() +
+  facet_wrap(~treatment) +
+  theme(strip.background = element_blank())
+
+
+
+```
+
+
+
+🎬 Graph the mean percent representation of each VFA g/l. Note `geom_col()` will plot proportion if we set` position = "fill"` 
+
+
+```{r}
+vfa_cummul_summary |> 
+  ggplot(aes(x = time_day, y = mean_g_l, fill = vfa)) +
+  geom_col(position = "fill") +
+  scale_fill_viridis_d(name = NULL) +
+  scale_x_continuous(name = "Time (days)") +
+  scale_y_continuous(name = "Mean Proportion VFA") +
+  theme_bw() +
+  facet_wrap(~treatment) +
+  theme(strip.background = element_blank())
+```
+
+
+
+## View the relationship between samples using PCA
+
+We have 8 genes in our dataset. PCA will allow us to plot our
+samples in the "VFA" space so we can see if treatments, time or replicate cluster.
+
+However, PCA expects a matrix with samples in rows and VFA, the variables, in columns. We will need to select the columns we need and pivot wider. Then convert to a matrix.
+
+🎬 
+
+```{r}
+vfa_cummul_pca <- vfa_cummul |> 
+  select(sample_replicate, 
+         treatment, 
+         replicate, 
+         time_day, 
+         vfa, 
+         conc_g_l) |> 
+  pivot_wider(names_from = vfa, 
+              values_from = conc_g_l)
+  
+```
+```{r}
+mat <- vfa_cummul_pca |> 
+  ungroup() |>
+  select(-sample_replicate, 
+         -treatment, 
+         -replicate, 
+         -time_day) |> 
+  as.matrix()
+
+```
+
+
+🎬 Perform PCA on the matrix:
+
+
+```{r}
+pca <- mat |>
+  prcomp(scale. = TRUE, 
+         rank. = 4) 
+```
+
+
+The `scale.` argument tells `prcomp()` to scale the data to have a mean
+of 0 and a standard deviation of 1. 
+The `rank.` argument tells `prcomp()` to only calculate the first 4
+principal components. This is useful for visualisation as we can only
+plot in 2 or 3 dimensions. We can see the results of the PCA by viewing
+the `summary()` of the `pca` object.
+
+```{r}
+summary(pca)
+```
+
+The Proportion of Variance tells us how much of the variance is
+explained by each component. We can see that the first component
+explains 0.7798 of the variance, the second 0.1018, and the third
+0.07597.  Together the first three components explain nearly 96% of the
+total variance in the data. Plotting PC1 against PC2 will capture about
+78% of the variance which is likely much better than we would get
+plotting any two VFA against each other. To plot the PC1 against PC2
+we will need to extract the PC1 and PC2 score from the pca object and
+add labels for the samples.
+
+
+
+🎬 Create a dataframe of the PC1 and PC2 scores which are in `pca$x` and
+add the sample information from vfa_cummul_pca:
+
+```{r}
+pca_labelled <- data.frame(pca$x,
+                           sample_replicate = vfa_cummul_pca$sample_replicate,
+                           treatment = vfa_cummul_pca$treatment,
+                           replicate = vfa_cummul_pca$replicate,
+                           time_day = vfa_cummul_pca$time_day) 
+```
+
+The dataframe should look like this:
+
+```{r}
+#| echo: false
+knitr::kable(pca_labelled)
+```
+
+🎬 Plot PC1 against PC2 and colour by time and shape by
+treatment:
+
+```{r}
+pca_labelled |> 
+  ggplot(aes(x = PC1, y = PC2, 
+             colour = factor(time_day),
+             shape = treatment)) +
+  geom_point(size = 3) +
+  scale_colour_viridis_d(end = 0.95, begin = 0.15,
+                         name = "Time") +
+  scale_shape_manual(values = c(17, 19),
+                     name = NULL) +
+  theme_classic()
+
+```
+🎬 Plot PC1 against PC2 and colour by time and facet
+treatment:
+
+```{r}
+pca_labelled |> 
+  ggplot(aes(x = PC1, y = PC2, colour = factor(time_day))) +
+  geom_point(size = 3) +
+  scale_colour_viridis_d(end = 0.95, begin = 0.15,
+                         name = "Time") +
+  facet_wrap(~treatment, ncol = 1) +
+  theme_classic()
+
+```
+
+replicates are similar at the same time and treatment especially early as we might expect. PC is essentially an axis of time.
+
+
+## Visualise the VFA concentration using a heatmap
+
+We are going to create an interactive heatmap with the **`heatmaply`** [@heatmaply] package. **`heatmaply`** takes a matrix as input so we  can use `mat`
+
+🎬 Set the rownames to the sample id whihcih is combination of `sample_replicate` and `time_day`:
+
+```{r}
+rownames(mat) <- interaction(vfa_cummul_pca$sample_replicate, 
+                             vfa_cummul_pca$time_day)
+
+```
+
+You might want to view the matrix by clicking on it in the environment pane. 
+
+
+🎬 Load the **`heatmaply`** package:
+```{r}
+library(heatmaply)
+```
+
+We need to tell the clustering algorithm how many clusters to create. We will set the number of clusters for the treatments to be 2 and the number of clusters for the vfa to be the same since it makes sense to see what clusters of genes correlate with the treatments.
+
+🎬 Set the number of clusters for the treatments and vfa:
+
+```{r}
+n_treatment_clusters <- 2
+n_vfa_clusters <- 2
+```
+
+
+🎬 Create the heatmap:
+```{r}
+#| fig-height: 10
+heatmaply(mat, 
+          scale = "column",
+          k_col = n_vfa_clusters,
+          k_row = n_treatment_clusters,
+          fontsize_row = 7, fontsize_col = 10,
+          labCol = colnames(mat),
+          labRow = rownames(mat),
+          heatmap_layers = theme(axis.line = element_blank()))
+```
+
+The heatmap will open in the viewer pane (rather than the plot pane) because it is html. You can "Show in a new window" to see it in a larger format. You can also zoom in and out and pan around the heatmap and download it as a png. You might feel the colour bars is not adding much to the plot. You can remove it by setting `hide_colorbar = TRUE,` in the `heatmaply()` function. 
+
+One of the NC replicates at time = 22 is very different from the other replicates. 
+The CN10 treatments cluster together at high time points. CN10 samples are more similar to NC samples early on.
+Most of the VFAs behave similarly with highest values later in the experiment for CN10 but isohexanoate and hexanoate differ. The difference might be because isohexanoate is especially low in the NC replicates at time = 1 and hexanoate is especially high in the NC replicate 2 at time = 22
+
+
+
+Pages made with R [@R-core], Quarto [@allaire2022], `knitr` [@knitr],
+`kableExtra` [@kableExtra]
+
+# References
diff --git a/renv.lock b/renv.lock
index 7ef9ce6..1c5ea62 100644
--- a/renv.lock
+++ b/renv.lock
@@ -558,19 +558,6 @@
       ],
       "Hash": "7fba3f587b0f3cb3232d03d540dbf772"
     },
-    "V8": {
-      "Package": "V8",
-      "Version": "4.3.3",
-      "Source": "Repository",
-      "Repository": "RSPM",
-      "Requirements": [
-        "Rcpp",
-        "curl",
-        "jsonlite",
-        "utils"
-      ],
-      "Hash": "20d81ec18bde233d8cc3265761fe8c93"
-    },
     "XML": {
       "Package": "XML",
       "Version": "3.99-0.14",
@@ -831,21 +818,6 @@
       ],
       "Hash": "f61dbaec772ccd2e17705c1e872e9e7c"
     },
-    "cffr": {
-      "Package": "cffr",
-      "Version": "0.5.0",
-      "Source": "Repository",
-      "Repository": "RSPM",
-      "Requirements": [
-        "R",
-        "cli",
-        "desc",
-        "jsonlite",
-        "jsonvalidate",
-        "yaml"
-      ],
-      "Hash": "291741a84f9c4b2229321f440256e715"
-    },
     "cli": {
       "Package": "cli",
       "Version": "3.6.1",
@@ -1018,20 +990,6 @@
       ],
       "Hash": "043fafb791081fc553f29021bd0a9a01"
     },
-    "desc": {
-      "Package": "desc",
-      "Version": "1.4.2",
-      "Source": "Repository",
-      "Repository": "RSPM",
-      "Requirements": [
-        "R",
-        "R6",
-        "cli",
-        "rprojroot",
-        "utils"
-      ],
-      "Hash": "6b9602c7ebbe87101a9c8edb6e8b6d21"
-    },
     "digest": {
       "Package": "digest",
       "Version": "0.6.33",
@@ -1651,6 +1609,28 @@
       ],
       "Hash": "8954069286b4b2b0d023d1b288dce978"
     },
+    "janitor": {
+      "Package": "janitor",
+      "Version": "2.2.0",
+      "Source": "Repository",
+      "Repository": "RSPM",
+      "Requirements": [
+        "R",
+        "dplyr",
+        "hms",
+        "lifecycle",
+        "lubridate",
+        "magrittr",
+        "purrr",
+        "rlang",
+        "snakecase",
+        "stringi",
+        "stringr",
+        "tidyr",
+        "tidyselect"
+      ],
+      "Hash": "5baae149f1082f466df9d1442ba7aa65"
+    },
     "jquerylib": {
       "Package": "jquerylib",
       "Version": "0.1.4",
@@ -1671,16 +1651,6 @@
       ],
       "Hash": "266a20443ca13c65688b2116d5220f76"
     },
-    "jsonvalidate": {
-      "Package": "jsonvalidate",
-      "Version": "1.3.2",
-      "Source": "Repository",
-      "Repository": "RSPM",
-      "Requirements": [
-        "V8"
-      ],
-      "Hash": "cdc2843ef7f44f157198bb99aea7552d"
-    },
     "knitr": {
       "Package": "knitr",
       "Version": "1.44",
@@ -2276,16 +2246,6 @@
       ],
       "Hash": "d65e35823c817f09f4de424fcdfa812a"
     },
-    "rprojroot": {
-      "Package": "rprojroot",
-      "Version": "2.0.4",
-      "Source": "Repository",
-      "Repository": "RSPM",
-      "Requirements": [
-        "R"
-      ],
-      "Hash": "4c8415e0ec1e29f3f4f6fc108bef0144"
-    },
     "rstudioapi": {
       "Package": "rstudioapi",
       "Version": "0.15.0",
@@ -2456,6 +2416,18 @@
       ],
       "Hash": "c956d93f6768a9789edbc13072b70c78"
     },
+    "snakecase": {
+      "Package": "snakecase",
+      "Version": "0.11.1",
+      "Source": "Repository",
+      "Repository": "RSPM",
+      "Requirements": [
+        "R",
+        "stringi",
+        "stringr"
+      ],
+      "Hash": "58767e44739b76965332e8a4fe3f91f1"
+    },
     "snow": {
       "Package": "snow",
       "Version": "0.4-4",