diff --git a/omics/kelly/Rplot001.jpg b/omics/kelly/Rplot001.jpg deleted file mode 100644 index fd0b397..0000000 Binary files a/omics/kelly/Rplot001.jpg and /dev/null differ diff --git a/omics/kelly/data-raw/VFAdata for BIO00088H R_studio.csv b/omics/kelly/data-raw/VFAdata for BIO00088H R_studio.csv new file mode 100644 index 0000000..f806cd9 --- /dev/null +++ b/omics/kelly/data-raw/VFAdata for BIO00088H R_studio.csv @@ -0,0 +1,76 @@ +Notes:,,,,,,,,,,,,,,,,, +Each sample has three replicates,,,,,,,,,,,,,,,,, +"Concentrations are listed here in mM (millimolar, or ""moles / L * 1000 millimoles / 1 moles"")",,,,,,,,,,,,,,,,, +CN samples have straw biomass added to AD vials,,,,,,,,,,,,,,,,, +NC samples have had water added to AD vials,,,,,,,,,,,,,,,,, +,,,,,,,,,Data for mM to g/l calculations,,,,,,,, +To calculate from this data,,Change in VFA with time,,,,,,,MW (g/mol),60.05,74.08,88.11,88.11,102.13,102.13,116.1583,116.1583 +,,"Recalculate the data into grams per liter, and change in VFA g/l with time",,,,,,,,Acetate,Propanoate,Isobutyrate,Butyrate,Isopentanoate,Pentanoate,Isohexanoate,Hexanoate +,,"Calculate the percent representation of each VFA, by mM and by weight",,,,,,,,,,,,,,, +,,"Useful graphs would include 1) mM or mg/l versus time for VFAs, 2) Delta mg/l versus time for each VFA",,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,, +Sample - Replicate,Time (day),Acetate,Propanoate,Isobutyrate,Butyrate,Isopentanoate,Pentanoate,Isohexanoate,Hexanoate,,,,,,,, +CN10-1,0,0,0,0,0,0,0,0,0,,,,,,,, +CN10-2,0,0,0,0,0,0,0,0,0,,,,,,,, +CN10-3,0,0,0,0,0,0,0,0,0,,,,,,,, +CN10-1,1,17.239,1.126,0.733,0.538,0.52,0.076,0.025,0.007,,,,,,,, +CN10-2,1,19.805,1.153,0.736,0.731,0.766,0.098,0.022,0.018,,,,,,,, +CN10-3,1,21.043,1.235,0.742,0.717,0.793,0.07,0.019,0.023,,,,,,,, +CN10-1,3,47.847,4.195,0.812,1.465,1.034,0.082,0.058,0.044,,,,,,,, +CN10-2,3,56.541,4.596,1.193,1.452,1.257,0.096,0.012,0.081,,,,,,,, +CN10-3,3,54.29,4.258,1.116,1.384,1.237,0.084,0.032,0.081,,,,,,,, +CN10-1,5,66.582,7.176,1.449,2.096,1.137,0.284,0.053,0.027,,,,,,,, +CN10-2,5,81.172,8.209,1.927,2.544,1.505,0.317,0.043,0.035,,,,,,,, +CN10-3,5,70.528,6.831,1.629,2.202,1.285,0.257,0.016,0.029,,,,,,,, +CN10-1,9,76.325,9.799,2.017,3.352,1.45,0.439,0.055,0.042,,,,,,,, +CN10-2,9,98.902,11.816,2.675,4.346,1.888,0.579,0.043,0.054,,,,,,,, +CN10-3,9,100.199,11.236,2.836,4.164,1.888,0.519,0.047,0.047,,,,,,,, +CN10-1,11,82.171,10.703,2.485,3.976,1.625,0.591,0.057,0.08,,,,,,,, +CN10-2,11,94.162,11.751,2.719,4.598,1.963,0.649,0.044,0.088,,,,,,,, +CN10-3,11,89.181,10.756,2.493,4.137,1.835,0.561,0.034,0.064,,,,,,,, +CN10-1,13,101.508,12.57,3.115,4.895,1.949,0.758,0.041,0.114,,,,,,,, +CN10-2,13,108.601,13.698,3.31,5.505,2.072,0.814,0.047,0.146,,,,,,,, +CN10-3,13,107.873,13.669,3.688,5.447,2.043,0.861,0.068,0.157,,,,,,,, +CN10-1,16,100.422,14.564,3.045,5.623,2.105,0.862,0.063,0.126,,,,,,,, +CN10-2,16,111.334,15.229,3.06,6.128,2.202,0.859,0.043,0.12,,,,,,,, +CN10-3,16,112.261,15.044,3.423,6.106,2.271,0.858,0.046,0.121,,,,,,,, +CN10-1,18,96.766,13.804,3.585,5.66,1.988,0.788,0.056,0.099,,,,,,,, +CN10-2,18,104.135,14.584,3.38,6.173,2.157,0.827,0.047,0.108,,,,,,,, +CN10-3,18,99.257,14.082,2.894,5.712,1.998,0.816,0.04,0.093,,,,,,,, +CN10-1,20,90.968,13.733,3.266,5.697,1.974,0.804,0.055,0.119,,,,,,,, +CN10-2,20,96.995,13.965,2.346,6.951,2.089,0.853,0.045,0.109,,,,,,,, +CN10-3,20,107.952,14.742,3.668,6.546,2.227,0.973,0.042,0.108,,,,,,,, +CN10-1,22,89.43,13.57,2.953,5.681,1.898,0.818,0.048,0.08,,,,,,,, +CN10-2,22,104.397,14.584,2.946,7.454,2.237,0.918,0.041,0.096,,,,,,,, +CN10-3,22,110.657,15.274,4.216,7.192,2.467,1.118,0.047,0.125,,,,,,,, +NC1,1,3.594,0.982,0.395,0.206,0.246,0.031,0.006,0.005,,,,,,,, +NC2,1,1.604,0.927,0.478,0.242,0.314,0.057,0.004,0.011,,,,,,,, +NC3,1,1.503,0.867,0.416,0.236,0.295,0.033,0.003,0.01,,,,,,,, +NC1,3,7.095,1.355,0.413,0.37,0.441,0.086,0.049,0.038,,,,,,,, +NC2,3,7.441,1.353,0.574,0.41,0.503,0.091,0.05,0.05,,,,,,,, +NC3,3,7.011,1.446,0.555,0.417,0.563,0.1,0.057,0.034,,,,,,,, +NC1,5,7.676,1.341,0.68,0.373,0.539,0.075,0.024,0.014,,,,,,,, +NC2,5,10.885,1.931,0.872,0.522,0.763,0.11,0.043,0.019,,,,,,,, +NC3,5,8.89,1.558,0.783,0.426,0.641,0.087,0.037,0.015,,,,,,,, +NC1,9,20.134,3.602,1.405,0.839,1.218,0.231,0.044,0.061,,,,,,,, +NC2,9,19.093,3.479,1.497,0.885,1.112,0.176,0.033,0.032,,,,,,,, +NC3,9,20.491,3.837,1.625,0.875,1.492,0.242,0.051,0.041,,,,,,,, +NC1,11,19.002,3.324,1.56,0.785,1.366,0.201,0.043,0.042,,,,,,,, +NC2,11,16.193,2.783,1.321,0.633,1.205,0.152,0.033,0.033,,,,,,,, +NC3,11,11.179,2.143,1.079,0.493,0.951,0.11,0.025,0.022,,,,,,,, +NC1,13,20.805,3.555,1.863,0.816,1.394,0.217,0.044,0.07,,,,,,,, +NC2,13,20.702,3.572,1.915,0.788,1.446,0.208,0.044,0.07,,,,,,,, +NC3,13,20.187,3.34,1.684,0.638,1.364,0.176,0.034,0.063,,,,,,,, +NC1,16,23.457,4.129,1.754,0.891,1.542,0.208,0.039,0.052,,,,,,,, +NC2,16,23.79,4.419,2.033,0.886,1.695,0.238,0.048,0.076,,,,,,,, +NC3,16,19.692,4.146,1.901,0.682,1.449,0.174,0.045,0.057,,,,,,,, +NC1,18,22.499,3.988,1.543,0.798,1.364,0.162,0.031,0.051,,,,,,,, +NC2,18,26.354,4.468,1.776,0.856,1.568,0.19,0.041,0.069,,,,,,,, +NC3,18,18.685,3.869,1.478,0.6,1.202,0.127,0.033,0.032,,,,,,,, +NC1,20,20.639,3.968,1.626,0.815,1.363,0.167,0.034,0.05,,,,,,,, +NC2,20,17.364,3.724,1.484,0.656,1.222,0.136,0.03,0.027,,,,,,,, +NC3,20,20.528,4.211,1.55,0.635,1.312,0.124,0.041,0.033,,,,,,,, +NC1,22,21.128,3.982,1.86,0.805,1.465,0.22,0.038,0.037,,,,,,,, +NC2,22,24.633,4.165,1.706,0.738,1.524,0.155,0.045,0.39,,,,,,,, +NC3,22,29.288,4.129,1.575,0.639,1.458,0.133,0.052,0.025,,,,,,,, diff --git a/omics/kelly/notes.txt b/omics/kelly/notes.txt index d7dfd3f..df47fb0 100644 --- a/omics/kelly/notes.txt +++ b/omics/kelly/notes.txt @@ -15,6 +15,17 @@ Acetate Propanoate Isobutyrate Butyrate Isopentanoate Pentanoate Isohexanoate He 60.05 74.08 88.11 88.11 102.13 102.13 116.1583 116.1583 -Useful graphs would include 1) mM or mg/l versus time for VFAs, 2) Delta mg/l versus time for each VFA - - +Useful graphs would include 1) mM or mg/l versus time for VFAs, 2) Delta mg/l versus time for each VFA + + +They should also be able to do a simple flux measurement: +change in VFA concentration over a period of time, divided by weight or volume of material. +In this case it might be equal to == +Delta(Acetate at 3 days - Acetate at 1 day)/ Delta (3days - 1day)/ 50 mls sludge. This would provide a final flux with the units of mg acetate per ml sludge per day. Let me know if this isn't clear. +-----So I need to add the lag in days. Check the 50mls bit. Is that going to be a constant in the replicate vial? or is it going to be different? . + + +Perhaps more importantly they should be able to graph and extract the reaction rate, assuming a first order chemical/biological reaction and an exponential falloff rate. I found this as a starting point (https://martinlab.chem.umass.edu/r-fitting-data/) , but I assume Emma has something much more effective already in the pipeline. + + + diff --git a/omics/kelly/temp/example.R b/omics/kelly/temp/example.R new file mode 100644 index 0000000..46db641 --- /dev/null +++ b/omics/kelly/temp/example.R @@ -0,0 +1,3 @@ +library(tidyverse) +example <- read_csv("omics/kelly/temp/example.csv") |> janitor::clean_names() + diff --git a/omics/kelly/temp/example.csv b/omics/kelly/temp/example.csv new file mode 100644 index 0000000..5327aab --- /dev/null +++ b/omics/kelly/temp/example.csv @@ -0,0 +1,42 @@ +"","t","fluorI" +"1",0,10.1385967718684 +"2",1,8.35533475533268 +"3",2,6.76788471742473 +"4",3,5.36280912133003 +"5",4,4.48247204833858 +"6",5,4.18185893519706 +"7",6,3.34896480340668 +"8",7,2.71315594667858 +"9",8,2.62614956633054 +"10",9,2.06087018509515 +"11",10,1.86074194679208 +"12",11,1.97873314323764 +"13",12,1.83378700440039 +"14",13,1.51439267158402 +"15",14,1.30714846254938 +"16",15,0.741412075403802 +"17",16,0.631299882299171 +"18",17,1.02965656564605 +"19",18,0.902298811540632 +"20",19,0.558746438644141 +"21",20,0.400682660920331 +"22",21,0.802962302399545 +"23",22,0.515497522312792 +"24",23,0.905477723273229 +"25",24,0.344178788232269 +"26",25,0.103340446000128 +"27",26,0.879290356441754 +"28",27,0.577592309885709 +"29",28,0.493046226597491 +"30",29,0.366163886781245 +"31",30,0.125189162368687 +"32",31,0.466644952642418 +"33",32,-0.095514224245819 +"34",33,0.177120019463404 +"35",34,0.20511866185474 +"36",35,0.312909299607934 +"37",36,0.184261436760406 +"38",37,0.151930611035154 +"39",38,0.216249309890573 +"40",39,-0.0782341680185992 +"41",40,0.32520930978664 diff --git a/omics/kelly/workshop.qmd b/omics/kelly/workshop.qmd index 948b559..7133508 100644 --- a/omics/kelly/workshop.qmd +++ b/omics/kelly/workshop.qmd @@ -1,6 +1,6 @@ --- -title: "Kelly" -subtitle: "VFAs" +title: "Workflow for VFA analysis" +subtitle: "Kelly's project" author: "Emma Rand" toc: true toc-depth: 4 @@ -17,40 +17,78 @@ editor: # Introduction -## Overview - -VFAs from AD vials - -- Two treatments: straw (CN10) and water (NC) - -- 10 time points: 1, 3, 5, 9, 11, 13, 16, 18, 20, 22 - -- three replicates per treatment per time point - -- 2 x 10 x 3 = 60 groups - -- 8 VFA with concentration in mM (millimolar): acetate, propanoate, isobutyrate, butyrate, isopentanoate, pentanoate, isohexanoate, hexanoate - - -To calculate from this data - -- Recalculate the data into grams per litre - - convert to molar: 1 millimolar to molar = 0.001 molar - - multiply by the molecular weight of each VFA -- Calculate *Change* in VFA g/l with time -- Calculate the percent representation of each VFA, by mM and by weight - -## Data files - -- 8 VFA in mM for 60 samples [vfa.csv](data-raw/vfa.csv) - -- Molecular weights for each VFA in grams per mole [mol_wt.txt](data-raw/mol_wt.txt) - +I have some data and information from Kelly. I have interpreted it and +written some code to do the calculations. + +However, Kelly hasn't had a chance to look at it yet so I am providing +the exact information and data he supplied along with my suggested +workflow based on my interpretation of the data and info. + +## Exact information supplied by Kelly + +*The [file is a CSV +file](data-raw/VFAdata%20for%20BIO00088H%20R_studio.csv), with some +notes on top and the data in the following order, post notes and +headers. Please note that all chemical data is in millimolar. There are +62 rows of actual data.* + +*Sample Name -- Replicate, Time (days), Acetate, Propanoate, +Isobutyrate, Butyrate, Isopentanoate, Pentanoate, Isohexanoate, +Hexanoate* + +*The students should be able to transform the data from mM to mg/L, and +to g/L. To do this they only need to multiply the molecular weight of +the compound (listed in the notes in the file) by the concentration in +mM to get mg/L. Obviously to get g/L they will just divide by 1000. They +should be able to graph the VFA concentrations with time.* + +*They should also be able to do a simple flux measurement, which is the +change in VFA concentration over a period of time, divided by weight or +volume of material. In this case it might be equal to == Delta(Acetate +at 3 days - Acetate at 1 day)/Delta (3days - 1day)/50 mls sludge. This +would provide a final flux with the units of mg acetate per ml sludge +per day. Let me know if this isn't clear.* + +*Perhaps more importantly they should be able to graph and extract the +reaction rate, assuming a first order chemical/biological reaction and +an exponential falloff rate. I found this as a starting point +(https://martinlab.chem.umass.edu/r-fitting-data/) , but I assume Emma +has something much more effective already in the pipeline.* + +## Emma's Worklflow interpretation + +I created these two data files from the original. + +1. 8 VFA in mM for 60 samples [vfa.csv](data-raw/vfa.csv). There were + 63 rows of data in the original file. There were no time 0 for one + treatment and all values were zero for the other treatment so I + removed those. + - Two treatments: straw (CN10) and water (NC) + - 10 time points: 1, 3, 5, 9, 11, 13, 16, 18, 20, 22 + - three replicates per treatment per time point + - 2 x 10 x 3 = 60 groups + - 8 VFA with concentration in mM (millimolar): acetate, + propanoate, isobutyrate, butyrate, isopentanoate, pentanoate, + isohexanoate, hexanoate +2. Molecular weights for each VFA in grams per mole + [mol_wt.txt](data-raw/mol_wt.txt) VFAs from AD vials + +We need to: + +1. Calculate *Change* in VFA g/l with time +2. Recalculate the data into grams per litre - convert to molar: 1 + millimolar to molar = 0.001 molar - multiply by the molecular weight + of each VFA +3. Calculate the percent representation of each VFA, by mM and by + weight +4. Calculate the flux (change in VFA concentration over a period of + time, divided by weight or volume of material) of each VFA, by mM + and by weight +5. Graph and extract the reaction rate, assuming a first order + chemical/biological reaction and an exponential falloff rate ## Getting started -## Set up a Project - 🎬 Start RStudio from the Start menu 🎬 Make an RStudio project. Be deliberate about where you create it so @@ -62,7 +100,6 @@ that it is a good place for you 🎬 Make a new script called `analysis.R` to carry out the rest of the work. - 🎬 Load `tidyverse` [@tidyverse] for importing, summarising, plotting and filtering. @@ -70,28 +107,27 @@ and filtering. library(tidyverse) ``` -## Examine the data +## Examine the data -🎬 Save the files to `data-raw`. Open them and examine them. You may want to use Excel for the csv file. +🎬 Save the files to `data-raw`. Open them and examine them. You may +want to use Excel for the csv file. 🎬 Answer the following questions: - What is in the rows and columns of each file? -- How many rows and columns are there in each file? -- How are the data organised ? - +- How many rows and columns are there in each file? +- How are the data organised ? ## Import - 🎬 Import ```{r} vfa_cummul <- read_csv("data-raw/vfa.csv") |> janitor::clean_names() ``` - -🎬 Split treatment and replicate to separate columns so there is a treatment column: +🎬 Split treatment and replicate to separate columns so there is a +treatment column: ```{r} vfa_cummul <- vfa_cummul |> @@ -101,10 +137,18 @@ vfa_cummul <- vfa_cummul |> remove = FALSE) ``` -The provided data is cumulative/absolute. We need to calculate the change in VFA with time. There is a function, `lag()` that will help us do this. It will take the previous value and subtract it from the current value. We need to do that separately for each `sample_replicate` so we need to group by `sample_replicate` first. We also need to make sure the data is in the right order so we will arrange by `sample_replicate` and `time_day`. +The provided data is cumulative/absolute. We need to calculate the +change in VFA with time. There is a function, `lag()` that will help us +do this. It will take the previous value and subtract it from the +current value. We need to do that separately for each `sample_replicate` +so we need to group by `sample_replicate` first. We also need to make +sure the data is in the right order so we will arrange by +`sample_replicate` and `time_day`. +## 1. Calculate *Change* in VFA g/l with time 🎬 Create dataframe for the change in VFA + ```{r} vfa_delta <- vfa_cummul |> group_by(sample_replicate) |> @@ -119,11 +163,16 @@ vfa_delta <- vfa_cummul |> hexanoate = hexanoate - lag(hexanoate)) ``` -Now we have two dataframes, one for the cumulative data and one for the change in VFA. - +Now we have two dataframes, one for the cumulative data and one for the +change in VFA. -To make conversions from mM to g/l we need to do mM * 0.001 * MW. We will import the molecular weight data, pivot the VFA data to long format and join the molecular weight data to the VFA data. Then we can calculate the g/l. We will do this for both the cumulative and delta dataframes. +## 2. Recalculate the data into grams per litre +To make conversions from mM to g/l we need to do mM \* 0.001 \* MW. We +will import the molecular weight data, pivot the VFA data to long format +and join the molecular weight data to the VFA data. Then we can +calculate the g/l. We will do this for both the cumulative and delta +dataframes. 🎬 import molecular weight data @@ -134,7 +183,6 @@ mol_wt <- read_table("data-raw/mol_wt.txt") |> 🎬 Pivot the cumulative data to long format: - ```{r} #| echo: false vfa_cummul <- vfa_cummul |> @@ -148,7 +196,8 @@ vfa_cummul <- vfa_cummul |> View `vfa_cummul` to check you understand what you have done. -🎬 Join molecular weight to data and calculate g/l (mutate to convert to g/l * 0.001 * MW): +🎬 Join molecular weight to data and calculate g/l (mutate to convert to +g/l \* 0.001 \* MW): ```{r} vfa_cummul <- vfa_cummul |> @@ -156,25 +205,11 @@ vfa_cummul <- vfa_cummul |> mutate(conc_g_l = conc_mM * 0.001 * mw) ``` - View `vfa_cummul` to check you understand what you have done. +Repeat for the delta data. -🎬 Add a column which is the percent representation of each VFA for mM and g/l: -```{r} -vfa_cummul <- vfa_cummul |> - group_by(sample_replicate, time_day) |> - mutate(percent_conc_g_l = conc_g_l / sum(conc_g_l) * 100, - percent_conc_mM = conc_mM / sum(conc_mM) * 100) - -``` - - - - - -🎬 Pivot the change data, `delta_vfa` to long format: - +🎬 Pivot the change data, `delta_vfa` to long format: ```{r} #| echo: false @@ -189,8 +224,8 @@ vfa_delta <- vfa_delta |> View `vfa_delta` to check it looks like `vfa_cummul` -🎬 Join molecular weight to data and calculate g/l (mutate to convert to g/l * 0.001 * MW): - +🎬 Join molecular weight to data and calculate g/l (mutate to convert to +g/l \* 0.001 \* MW): ```{r} #| echo: false @@ -199,7 +234,22 @@ vfa_delta <- vfa_delta |> mutate(conc_g_l = conc_mM * 0.001 * mw) ``` -## Graphs +## 3. Calculate the percent representation of each VFA + +by mM and by weight + +🎬 Add a column which is the percent representation of each VFA for mM +and g/l: + +```{r} +vfa_cummul <- vfa_cummul |> + group_by(sample_replicate, time_day) |> + mutate(percent_conc_g_l = conc_g_l / sum(conc_g_l) * 100, + percent_conc_mM = conc_mM / sum(conc_mM) * 100) + +``` + +## Graphs for info so far 🎬 Make summary data for graphing @@ -224,6 +274,7 @@ vfa_delta_summary <- vfa_delta |> ``` 🎬 Graph the cumulative data, grams per litre: + ```{r} vfa_cummul_summary |> @@ -270,10 +321,8 @@ vfa_delta_summary |> ``` - - -🎬 Graph the mean percent representation of each VFA g/l. Note `geom_col()` will plot proportion if we set` position = "fill"` - +🎬 Graph the mean percent representation of each VFA g/l. Note +`geom_col()` will plot proportion if we set`position = "fill"` ```{r} vfa_cummul_summary |> @@ -287,16 +336,16 @@ vfa_cummul_summary |> theme(strip.background = element_blank()) ``` +### View the relationship between samples using PCA +We have 8 VFA in our dataset. PCA will allow us to plot our samples in +the "VFA" space so we can see if treatments, time or replicate cluster. -## View the relationship between samples using PCA - -We have 8 genes in our dataset. PCA will allow us to plot our -samples in the "VFA" space so we can see if treatments, time or replicate cluster. +However, PCA expects a matrix with samples in rows and VFA, the +variables, in columns. We will need to select the columns we need and +pivot wider. Then convert to a matrix. -However, PCA expects a matrix with samples in rows and VFA, the variables, in columns. We will need to select the columns we need and pivot wider. Then convert to a matrix. - -🎬 +🎬 ```{r} vfa_cummul_pca <- vfa_cummul |> @@ -310,6 +359,7 @@ vfa_cummul_pca <- vfa_cummul |> values_from = conc_g_l) ``` + ```{r} mat <- vfa_cummul_pca |> ungroup() |> @@ -321,23 +371,20 @@ mat <- vfa_cummul_pca |> ``` - 🎬 Perform PCA on the matrix: - ```{r} pca <- mat |> prcomp(scale. = TRUE, rank. = 4) ``` - The `scale.` argument tells `prcomp()` to scale the data to have a mean -of 0 and a standard deviation of 1. -The `rank.` argument tells `prcomp()` to only calculate the first 4 -principal components. This is useful for visualisation as we can only -plot in 2 or 3 dimensions. We can see the results of the PCA by viewing -the `summary()` of the `pca` object. +of 0 and a standard deviation of 1. The `rank.` argument tells +`prcomp()` to only calculate the first 4 principal components. This is +useful for visualisation as we can only plot in 2 or 3 dimensions. We +can see the results of the PCA by viewing the `summary()` of the `pca` +object. ```{r} summary(pca) @@ -346,14 +393,12 @@ summary(pca) The Proportion of Variance tells us how much of the variance is explained by each component. We can see that the first component explains 0.7798 of the variance, the second 0.1018, and the third -0.07597. Together the first three components explain nearly 96% of the +0.07597. Together the first three components explain nearly 96% of the total variance in the data. Plotting PC1 against PC2 will capture about 78% of the variance which is likely much better than we would get -plotting any two VFA against each other. To plot the PC1 against PC2 -we will need to extract the PC1 and PC2 score from the pca object and -add labels for the samples. - - +plotting any two VFA against each other. To plot the PC1 against PC2 we +will need to extract the PC1 and PC2 score from the pca object and add +labels for the samples. 🎬 Create a dataframe of the PC1 and PC2 scores which are in `pca$x` and add the sample information from vfa_cummul_pca: @@ -373,8 +418,7 @@ The dataframe should look like this: knitr::kable(pca_labelled) ``` -🎬 Plot PC1 against PC2 and colour by time and shape by -treatment: +🎬 Plot PC1 against PC2 and colour by time and shape by treatment: ```{r} pca_labelled |> @@ -387,10 +431,9 @@ pca_labelled |> scale_shape_manual(values = c(17, 19), name = NULL) + theme_classic() - ``` -🎬 Plot PC1 against PC2 and colour by time and facet -treatment: + +🎬 Plot PC1 against PC2 and colour by time and facet treatment: ```{r} pca_labelled |> @@ -400,33 +443,38 @@ pca_labelled |> name = "Time") + facet_wrap(~treatment, ncol = 1) + theme_classic() - ``` -replicates are similar at the same time and treatment especially early as we might expect. PC is essentially an axis of time. - +replicates are similar at the same time and treatment especially early +as we might expect. PC is essentially an axis of time. -## Visualise the VFA concentration using a heatmap +### Visualise the VFA concentration using a heatmap -We are going to create an interactive heatmap with the **`heatmaply`** [@heatmaply] package. **`heatmaply`** takes a matrix as input so we can use `mat` +We are going to create an interactive heatmap with the **`heatmaply`** +[@heatmaply] package. **`heatmaply`** takes a matrix as input so we can +use `mat` -🎬 Set the rownames to the sample id whihcih is combination of `sample_replicate` and `time_day`: +🎬 Set the rownames to the sample id whihcih is combination of +`sample_replicate` and `time_day`: ```{r} rownames(mat) <- interaction(vfa_cummul_pca$sample_replicate, vfa_cummul_pca$time_day) - ``` -You might want to view the matrix by clicking on it in the environment pane. - +You might want to view the matrix by clicking on it in the environment +pane. 🎬 Load the **`heatmaply`** package: + ```{r} library(heatmaply) ``` -We need to tell the clustering algorithm how many clusters to create. We will set the number of clusters for the treatments to be 2 and the number of clusters for the vfa to be the same since it makes sense to see what clusters of genes correlate with the treatments. +We need to tell the clustering algorithm how many clusters to create. We +will set the number of clusters for the treatments to be 2 and the +number of clusters for the vfa to be the same since it makes sense to +see what clusters of genes correlate with the treatments. 🎬 Set the number of clusters for the treatments and vfa: @@ -435,8 +483,8 @@ n_treatment_clusters <- 2 n_vfa_clusters <- 2 ``` - 🎬 Create the heatmap: + ```{r} #| fig-height: 10 heatmaply(mat, @@ -449,13 +497,41 @@ heatmaply(mat, heatmap_layers = theme(axis.line = element_blank())) ``` -The heatmap will open in the viewer pane (rather than the plot pane) because it is html. You can "Show in a new window" to see it in a larger format. You can also zoom in and out and pan around the heatmap and download it as a png. You might feel the colour bars is not adding much to the plot. You can remove it by setting `hide_colorbar = TRUE,` in the `heatmaply()` function. - -One of the NC replicates at time = 22 is very different from the other replicates. -The CN10 treatments cluster together at high time points. CN10 samples are more similar to NC samples early on. -Most of the VFAs behave similarly with highest values later in the experiment for CN10 but isohexanoate and hexanoate differ. The difference might be because isohexanoate is especially low in the NC replicates at time = 1 and hexanoate is especially high in the NC replicate 2 at time = 22 - - +The heatmap will open in the viewer pane (rather than the plot pane) +because it is html. You can "Show in a new window" to see it in a larger +format. You can also zoom in and out and pan around the heatmap and +download it as a png. You might feel the colour bars is not adding much +to the plot. You can remove it by setting `hide_colorbar = TRUE,` in the +`heatmaply()` function. + +One of the NC replicates at time = 22 is very different from the other +replicates. The CN10 treatments cluster together at high time points. +CN10 samples are more similar to NC samples early on. Most of the VFAs +behave similarly with highest values later in the experiment for CN10 +but isohexanoate and hexanoate differ. The difference might be because +isohexanoate is especially low in the NC replicates at time = 1 and +hexanoate is especially high in the NC replicate 2 at time = 22 + +## 4. Calculate the flux - pending. + +Calculate the flux(change in VFA concentration over a period of time, +divided by weight or volume of material) of each VFA, by mM and by +weight. + +I've requested clarification: *for the flux measurements, do they need +graphs of the rate of change wrt time? And is the sludge volume going to +be a constant for all samples or something they measure and varies by +vial?* + +## 5. Graph and extract the reaction rate - pending + +Graph and extract the reaction rate assuming a first order +chemical/biological reaction and an exponential falloff rate + +I've requested clarification: *for the nonlinear least squares curve +fitting, I assume x is time but I'm not clear what the Y variable is - +concentration? or change in concentration? or rate of change of +concentration?* Pages made with R [@R-core], Quarto [@allaire2022], `knitr` [@knitr], `kableExtra` [@kableExtra]