diff --git a/R/htr_calc_anomalies.R b/R/htr_calc_anomalies.R index ed0fc3a..565e906 100644 --- a/R/htr_calc_anomalies.R +++ b/R/htr_calc_anomalies.R @@ -1,11 +1,51 @@ #' Calculate anomalies relative to the baseline mean #' +#' This function calculates climate anomalies by subtracting baseline mean values +#' from projection data using CDO (Climate Data Operators). It processes multiple +#' climate model files in parallel, matching variables, frequencies, and models +#' between the projection data and baseline means. +#' +#' @details +#' The function uses the CDO `sub` operator to subtract baseline means from +#' projection files. It automatically matches files based on variable, frequency, +#' and model metadata extracted from CMIP6-formatted filenames. The process runs +#' in parallel using multiple CPU cores for efficient processing of large datasets. +#' +#' The workflow involves: +#' 1. Extracting metadata from baseline mean files +#' 2. Finding corresponding projection files for each variable-frequency-model combination +#' 3. Subtracting the appropriate baseline mean from each projection file using CDO +#' 4. Saving results with "_anomalies_" in the filename +#' #' @inheritParams htr_slice_period -#' @param mndir The directory where the baseline mean files are stored +#' @param mndir Character string. The directory where the baseline mean files are +#' stored. Files should follow CMIP6 naming conventions with variable, frequency, +#' and model information in the filename. +#' +#' @return +#' No return value. The function creates anomaly files in the specified output +#' directory with "_anomalies_" replacing "_merged_" in the original filenames. +#' +#' @note +#' - Requires CDO (Climate Data Operators) to be installed and accessible from the system PATH +#' - Input files must follow CMIP6 naming conventions for proper metadata extraction +#' - Baseline mean files and projection files must have matching variable, frequency, and model names +#' - Uses parallel processing with (number of CPU cores - 2) workers +#' +#' @references +#' CDO User Guide: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf +#' CDO sub operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=297 #' #' @export #' #' @examples +#' \dontrun{ +#' htr_calc_anomalies( +#' indir = file.path(base_dir, "data", "tos", "raw"), +#' indir = file.path(base_dir, "data", "tos", "mean"), +#' outdir = file.path(base_dir, "data", "tos", "anomalies") +#' ) +#' } htr_calc_anomalies <- function(indir, # input directory of the projections mndir, # directory of baseline mean outdir # where anomalies will be saved diff --git a/R/htr_calc_mean.R b/R/htr_calc_mean.R index f73a389..635ca3a 100644 --- a/R/htr_calc_mean.R +++ b/R/htr_calc_mean.R @@ -1,14 +1,59 @@ -#' Calculate mean of specified time period. +#' Calculate temporal mean of specified time period #' -#' Used to calculate baseline means. +#' This function calculates temporal means over a specified time period using CDO +#' (Climate Data Operators). It is primarily used to calculate baseline climatological +#' means from historical climate data, which can then be used for anomaly calculations. +#' +#' @details +#' The function uses the CDO `timmean` operator combined with `selyear` to calculate +#' temporal means over the specified year range. It processes files in parallel for +#' efficient computation of large climate datasets. The function automatically +#' generates output filenames with "_mean_" and the year range in the filename. +#' +#' The CDO command executed is: +#' `cdo -L -timmean -selyear,year_start/year_end input_file output_file` +#' +#' Where: +#' - `-L` enables netCDF4 compression +#' - `timmean` calculates the temporal mean +#' - `selyear` selects the specified year range #' #' @author Dave Schoeman and Tin Buenafe #' #' @inheritParams htr_slice_period +#' @param scenario Character string. The CMIP6 scenario to process (e.g., "historical", +#' "ssp126", "ssp245"). Use "historical" for calculating baseline climatological means. +#' @param year_start Numeric. Starting year for calculating the temporal mean (inclusive). +#' @param year_end Numeric. Ending year for calculating the temporal mean (inclusive). +#' +#' @return +#' No return value. The function creates mean files in the specified output directory +#' with "_mean_YYYYMMDD-YYYYMMDD.nc" replacing "_merged_" in the original filenames, +#' where the dates represent the start and end of the averaging period. +#' +#' @note +#' - Requires CDO (Climate Data Operators) to be installed and accessible from the system PATH +#' - Input files must be merged time series files (typically created by [`htr_merge_files()`]) +#' - Uses parallel processing with (number of CPU cores - 2) workers +#' - The `-L` flag enables netCDF4 compression for smaller output files +#' +#' @references +#' CDO User Guide: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf +#' CDO timmean operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=180 +#' CDO selyear operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=124 #' #' @export #' #' @examples +#' \dontrun{ +#' htr_calc_mean( +#' indir = file.path(base_dir, "data", "tos", "raw"), +#' outdir = file.path(base_dir, "data", "tos", "mean"), +#' scenario = "historical", +#' year_start = 1950, +#' year_end = 2014 +#' ) +#' } htr_calc_mean <- function(indir, # where inputs are outdir, # where outputs will be saved scenario, # historical or ssp (use historical for calculating baseline means) @@ -17,7 +62,7 @@ htr_calc_mean <- function(indir, # where inputs are ) { . <- NULL # Stop devtools::check() complaints about NSE - w <- parallel::detectCores() - 2 + w <- parallelly::availableCores(method = "system", omit = 2) ############## diff --git a/R/htr_change_freq.R b/R/htr_change_freq.R index 325e992..5d530b0 100644 --- a/R/htr_change_freq.R +++ b/R/htr_change_freq.R @@ -1,19 +1,57 @@ -#' Change frequency +#' Change temporal frequency of climate data +#' +#' This function changes the temporal frequency of climate data from daily to either +#' monthly or yearly averages using CDO (Climate Data Operators). It supports both +#' HPC array job processing and parallel processing for efficient computation. +#' +#' @details +#' The function uses CDO temporal aggregation operators to change frequency: +#' - For yearly frequency: Uses `cdo -yearmean` to calculate annual means +#' - For monthly frequency: Uses `cdo -monmean` to calculate monthly means +#' +#' The function can operate in different modes: +#' - **Array mode** (`hpc = "array"`): Processes a single specified file (useful for HPC job arrays) +#' - **Parallel mode** (`hpc = "parallel"` or `hpc = NA`): Processes all files in the input directory using parallel workers +#' +#' Output files are renamed to reflect the new temporal frequency, replacing "_merged_" +#' with either "_annual_" or "_monthly_" in the filename. #' #' @author Tin Buenafe #' #' @inheritParams htr_slice_period +#' @param freq Character string. The target temporal frequency. Valid options are: +#' - `"yearly"` or `"annual"`: Calculate annual means using CDO yearmean +#' - `"monthly"`: Calculate monthly means using CDO monmean +#' +#' @return +#' No return value. The function creates frequency-converted files in the specified +#' output directory with "_annual_" or "_monthly_" replacing "_merged_" in the +#' original filenames. +#' +#' @note +#' - Requires CDO (Climate Data Operators) to be installed and accessible from the system PATH +#' - Input files should typically be daily frequency data for meaningful aggregation +#' - For HPC environments, set `hpc = "array"` and specify the `file` parameter +#' - Uses parallel processing when `hpc = NA` or `hpc = "parallel"` +#' - Worker count is automatically determined based on available CPU cores +#' +#' @references +#' CDO User Guide: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf +#' CDO yearmean operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=191 +#' CDO monmean operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=186 #' #' @export #' #' @examples +#' \dontrun{ #' htr_change_freq( -#' hpc = NA, -#' file = NA, -#' freq = "monthly", -#' indir = here("data", "proc", "sliced", variable), -#' outdir = here("data", "proc", "monthly", variable) +#' hpc = NA, +#' file = NA, +#' freq = "monthly", +#' indir = file.path(".", "data", "proc", "sliced", variable), +#' outdir = file.path(".", "data", "proc", "monthly", variable) #' ) +#' } htr_change_freq <- function(hpc = NA, # if ran in the HPC, possible values are "array", "parallel" file = NA, # hpc = "array", the input will be the file freq, # possible values are "yearly" or "monthly" diff --git a/R/htr_create_ensemble.R b/R/htr_create_ensemble.R index e6e489b..46b9d01 100644 --- a/R/htr_create_ensemble.R +++ b/R/htr_create_ensemble.R @@ -1,26 +1,67 @@ -#' Create an ensemble based on list of models +#' Create multi-model ensemble from climate model outputs #' +#' This function creates multi-model ensembles by combining outputs from multiple +#' climate models using CDO (Climate Data Operators). It can calculate either the +#' ensemble mean or median across the specified models, with support for seasonal +#' and depth-resolved data filtering. +#' +#' @details +#' The function uses CDO ensemble operators to combine multiple model outputs: +#' - **Ensemble mean**: Uses `cdo -ensmean` to calculate the arithmetic mean across models +#' - **Ensemble median**: Uses `cdo -ensmedian` to calculate the median across models +#' +#' The function automatically: +#' 1. Filters files based on variable, frequency, scenario, and optionally season/domain +#' 2. Selects only files from the specified models in `model_list` +#' 3. Creates ensemble statistics using the appropriate CDO operator +#' 4. Saves output with "ensemble" replacing the model name in the filename +#' +#' Output files are compressed using zip compression (`-z zip`) and use netCDF4 +#' format with the `-L` flag for efficient storage. #' #' @inheritParams htr_slice_period -#' @param model_list Character string of models to use for the ensemble -#' @param variable The variable to create the ensemble for -#' @param mean Use the mean (TRUE; default) or the median (FALSE) when creating the ensemble. -#' @param season If using seasonal frequency, input the season name to detect the files -#' @param domain If using depth-resolved models, input the domain name to detect the files +#' @param model_list Character vector. Names of climate models to include in the +#' ensemble. Model names must match those in the input filenames (e.g., +#' `c("ACCESS-ESM1-5", "CanESM5", "GFDL-ESM4")`). +#' @param variable Character string. The climate variable to create the ensemble for +#' (e.g., "tos" for sea surface temperature, "pr" for precipitation). Default is "tos". +#' @param mean Logical. If `TRUE` (default), calculates ensemble mean using CDO ensmean. +#' If `FALSE`, calculates ensemble median using CDO ensmedian. +#' @param season Character string. Optional season name to filter files (e.g., "DJF", +#' "JJA"). Only files containing this string will be included. Default is empty string (no filtering). +#' @param domain Character string. Optional domain name for depth-resolved models +#' (e.g., "surface", "0-100m"). Only files containing this string will be included. +#' Default is empty string (no filtering). +#' +#' @return +#' No return value. The function creates an ensemble file in the specified output +#' directory with "ensemble" replacing the model name in the original filename. +#' +#' @note +#' - Requires CDO (Climate Data Operators) to be installed and accessible from the system PATH +#' - All input files must be on the same spatial grid (use [`htr_regrid_esm()`] first if needed) +#' - All input files must have the same temporal resolution and time periods +#' - Model names in `model_list` must exactly match those in the input filenames +#' - Uses zip compression for efficient file storage +#' +#' @references +#' CDO User Guide: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf +#' CDO ensmean operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=78 +#' CDO ensmedian operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=79 #' #' @export #' #' @examples #' \dontrun{ #' htr_create_ensemble( -#' hpc = NA, -#' indir = file.path(base_dir, "data", "proc", "regridded", "yearly", "tos"), -#' outdir = file.path(base_dir, "data", "proc", "ensemble", "mean", "tos"), -#' model_list = c("ACCESS-ESM1-5", "CanESM5"), -#' variable = "tos", -#' freq = "Omon", -#' scenario = "ssp126", -#' mean = TRUE +#' hpc = NA, +#' indir = file.path(base_dir, "data", "proc", "regridded", "yearly", "tos"), +#' outdir = file.path(base_dir, "data", "proc", "ensemble", "mean", "tos"), +#' model_list = c("ACCESS-ESM1-5", "CanESM5"), +#' variable = "tos", +#' freq = "Omon", +#' scenario = "ssp126", +#' mean = TRUE #' ) #' } htr_create_ensemble <- function(hpc = NA, # if ran in the HPC, possible values are "array", "parallel" diff --git a/R/htr_download_ESM.R b/R/htr_download_ESM.R index 3b1f39e..e825ad5 100644 --- a/R/htr_download_ESM.R +++ b/R/htr_download_ESM.R @@ -1,16 +1,56 @@ -#' Download ESM data +#' Download Earth System Model (ESM) data using wget scripts +#' +#' This function downloads climate model data from remote repositories using wget +#' scripts. It processes multiple wget scripts in parallel to efficiently download +#' large climate datasets, typically from CMIP6 data nodes or similar repositories. +#' +#' @details +#' The function executes bash wget scripts that contain download commands for climate +#' data files. It changes the working directory to the output directory before running +#' each wget script to ensure files are downloaded to the correct location. +#' +#' The process involves: +#' 1. Finding all wget script files in the input directory +#' 2. For each script, changing to the output directory +#' 3. Executing the wget script with the `-s` flag (silent mode) +#' 4. Restoring the original working directory +#' +#' All wget scripts are processed in parallel using multiple workers for efficient +#' downloading of large datasets. #' #' @author Dave Schoeman and Tin Buenafe +#' #' @inheritParams htr_slice_period +#' @param indir Character string. Directory containing wget script files. These are +#' typically bash scripts with wget commands for downloading climate data from +#' remote repositories (e.g., ESGF data nodes). +#' @param outdir Character string. Directory where the downloaded NetCDF files will +#' be saved. The function will change to this directory before executing wget scripts. +#' +#' @return +#' No return value. The function downloads NetCDF files to the specified output +#' directory as defined by the wget scripts. +#' +#' @note +#' - Requires `wget` to be installed and accessible from the system PATH +#' - Wget scripts should be properly formatted bash scripts with appropriate download commands +#' - The function temporarily changes working directory during execution +#' - Uses parallel processing with (number of CPU cores - 2) workers +#' - Ensure sufficient disk space is available for downloaded climate data +#' - Network connectivity and access permissions to data repositories are required +#' +#' @references +#' ESGF Data Portal: https://esgf-node.llnl.gov/projects/esgf-llnl/ +#' CMIP6 Data Access: https://pcmdi.llnl.gov/CMIP6/ #' #' @export #' #' @examples #' \dontrun{ #' htr_download_ESM( -#' hpc = NA, -#' indir = file.path(base_dir, "data", "raw", "wget"), # input directory -#' outdir = file.path(base_dir, "data", "raw", "tos") # output directory +#' hpc = NA, +#' indir = file.path(base_dir, "data", "raw", "wget"), # input directory +#' outdir = file.path(base_dir, "data", "raw", "tos") # output directory #' ) #' } htr_download_ESM <- function(hpc = NA, # if ran in the HPC, possible values are "array", "parallel" diff --git a/R/htr_fix_calendar.R b/R/htr_fix_calendar.R index 277fc35..cf29786 100644 --- a/R/htr_fix_calendar.R +++ b/R/htr_fix_calendar.R @@ -1,19 +1,56 @@ -#' Fix calendars (leap years) +#' Fix calendar systems by standardizing to 365-day calendar #' +#' This function standardizes climate model data to use a consistent 365-day calendar +#' system by removing leap days (February 29th) and setting the calendar attribute. +#' This is essential for consistent temporal analysis across different climate models +#' that may use different calendar systems. +#' +#' @details +#' Climate models use various calendar systems (Gregorian, 365-day, 360-day, etc.), +#' which can cause issues when comparing or combining data from different models. +#' This function standardizes all data to a 365-day calendar using CDO operations. +#' +#' The function: +#' 1. Checks if files contain leap days by examining if the number of time steps is divisible by 365 +#' 2. For daily data with leap days: Uses `cdo -setcalendar,365_day -delete,month=2,day=29` to remove February 29th +#' 3. For data without leap days: Uses `cdo setcalendar,365_day` to set the calendar attribute +#' 4. Replaces original files with the calendar-corrected versions +#' +#' The process creates temporary files during processing to avoid data corruption. #' #' @author Dave Schoeman and Tin Buenafe #' #' @inheritParams htr_slice_period +#' @param indir Character string. Directory containing NetCDF files that need calendar +#' standardization. Files should be climate model outputs with time dimensions. +#' +#' @return +#' No return value. The function modifies files in-place, replacing original files +#' with calendar-standardized versions. Progress messages are printed to the console +#' indicating which files had leap days removed. +#' +#' @note +#' - Requires CDO (Climate Data Operators) to be installed and accessible from the system PATH +#' - **WARNING**: This function modifies files in-place. Ensure you have backups of original data +#' - Only processes daily frequency data for leap day removal (detected by "_day_" in frequency) +#' - Uses parallel processing when `hpc` is not set to "array" +#' - Creates temporary files during processing which are automatically cleaned up +#' - Prints informative messages about which files are being processed +#' +#' @references +#' CDO User Guide: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf +#' CDO setcalendar operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=142 +#' CDO delete operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=60 +#' CMIP6 calendar conventions: https://pcmdi.llnl.gov/CMIP6/Guide/dataUsers.html #' #' @export #' #' @examples #' \dontrun{ -#' #' htr_fix_calendar( #' hpc = NA, #' file = NA, -#' indir = file.path(base_dir, "data", "merged"), # input directory +#' indir = file.path(base_dir, "data", "merged") # input directory #' ) #' } htr_fix_calendar <- function(hpc = NA, # if ran in the HPC, possible values are "array", "parallel" diff --git a/R/htr_integrate_levels.R b/R/htr_integrate_levels.R index be635d6..962b10a 100644 --- a/R/htr_integrate_levels.R +++ b/R/htr_integrate_levels.R @@ -1,19 +1,76 @@ -#' Get the weighted vertical means +#' Calculate vertical means from depth-resolved climate data #' -#' hjfjhfjhf +#' This function calculates vertical (depth) means from 3D ocean climate model data +#' using CDO (Climate Data Operators). It can either integrate across all vertical +#' levels or select a specific depth range before calculating the vertical mean. +#' +#' @details +#' The function processes depth-resolved ocean data (e.g., temperature, salinity) +#' to create vertically-averaged fields. This is useful for analyzing ocean properties +#' at specific depth ranges or creating depth-integrated quantities. +#' +#' The CDO operations performed are: +#' - **With level selection**: `cdo select,levrange=min,max` followed by `cdo vertmean` +#' - **Without level selection**: `cdo vertmean` directly on the full depth range +#' +#' The function: +#' 1. Optionally selects a specific depth range using CDO select with levrange +#' 2. Calculates the vertical mean using CDO vertmean operator +#' 3. Adds an optional domain name suffix to output filenames +#' 4. Uses a temporary directory for intermediate processing when level selection is used #' #' @inheritParams htr_seasonal_frequency -#' @param select_levels If select levels = TRUE, minimum and maximum levels need to be provided -#' @param min_level Minimum level of depth domain -#' @param max_level Maximum level of depth domain -#' @param domain_name Depth domain name +#' @param tempdir Character string. Directory for temporary files during processing. +#' Used when `select_levels = TRUE` to store intermediate files after level selection. +#' @param select_levels Logical. If `TRUE`, selects a specific depth range defined by +#' `min_level` and `max_level` before calculating vertical means. If `FALSE` (default), +#' integrates across all available vertical levels. +#' @param min_level Numeric. Minimum depth level for integration (required when +#' `select_levels = TRUE`). Units depend on the model's vertical coordinate system. +#' @param max_level Numeric. Maximum depth level for integration (required when +#' `select_levels = TRUE`). Units depend on the model's vertical coordinate system. +#' @param domain_name Character string. Optional suffix to add to output filenames +#' to identify the depth domain (e.g., "surface", "0-100m"). Default is empty string. #' #' @return +#' No return value. The function creates vertically-integrated files in the specified +#' output directory. If `domain_name` is provided, it is added as a suffix to the +#' original filename before the file extension. +#' +#' @note +#' - Requires CDO (Climate Data Operators) to be installed and accessible from the system PATH +#' - Input files must be 3D ocean data with vertical levels (depth or pressure coordinates) +#' - Temporary files are automatically cleaned up after processing +#' - Uses parallel processing when `hpc` is not set to "array" +#' - Level selection uses CDO's levrange which works with the model's native vertical coordinates +#' +#' @references +#' CDO User Guide: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf +#' CDO vertmean operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=203 +#' CDO select operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=123 +#' #' @export #' #' @examples #' \dontrun{ +#' # Integrate all levels +#' htr_integrate_levels( +#' indir = "path/to/3d/data", +#' tempdir = "path/to/temp", +#' outdir = "path/to/output", +#' select_levels = FALSE +#' ) #' +#' # Integrate specific depth range (e.g., upper 100m) +#' htr_integrate_levels( +#' indir = "path/to/3d/data", +#' tempdir = "path/to/temp", +#' outdir = "path/to/output", +#' select_levels = TRUE, +#' min_level = 0, +#' max_level = 100, +#' domain_name = "upper100m" +#' ) #' } htr_integrate_levels <- function(hpc = NA, # if ran in the HPC, possible values are "array", "parallel" file = NA, # hpc = "array", the input will be the file diff --git a/R/htr_merge_files.R b/R/htr_merge_files.R index 20c6d6b..6245e0a 100644 --- a/R/htr_merge_files.R +++ b/R/htr_merge_files.R @@ -1,22 +1,69 @@ -#' Merge Files +#' Merge climate model files into continuous time series #' -#' Merge files according to model, variable, frequency, scenario/experiment +#' This function merges multiple NetCDF files from the same climate model, variable, +#' frequency, scenario, and variant into single continuous time series files using +#' CDO (Climate Data Operators). This is essential for creating uninterrupted time +#' series from climate model outputs that are often split across multiple files. +#' +#' @details +#' Climate model data is typically provided as multiple files covering different +#' time periods. This function combines these files into continuous time series +#' using the CDO `mergetime` operator, which concatenates files along the time dimension. +#' +#' The function: +#' 1. Extracts metadata (variable, frequency, scenario, model, variant) from all files +#' 2. Groups files by their metadata combinations +#' 3. Filters files based on the specified year range to avoid out-of-scope data +#' 4. Merges files for each group using `cdo -L -selname,'variable' -mergetime` +#' 5. Creates output filenames with "_merged_" and the full time range +#' +#' The CDO command used is: +#' `cdo -L -selname,'variable' -mergetime input_files output_file` +#' +#' Where: +#' - `-L` enables netCDF4 compression +#' - `selname` ensures only the specified variable is retained +#' - `mergetime` concatenates files along the time dimension #' #' @author Dave Schoeman and Tin Buenafe #' #' @inheritParams htr_slice_period +#' @param year_start Numeric. Earliest year to include in the merged files. Files +#' ending before this year (for historical data) will be excluded. +#' @param year_end Numeric. Latest year to include in the merged files. Files +#' starting after this year (for projection data) will be excluded. +#' +#' @return +#' No return value. The function creates merged time series files in the specified +#' output directory with filenames following the pattern: +#' `variable_frequency_model_scenario_variant_merged_YYYYMMDD-YYYYMMDD.nc` +#' +#' @note +#' - Requires CDO (Climate Data Operators) to be installed and accessible from the system PATH +#' - Input files must follow CMIP6 naming conventions for proper metadata extraction +#' - Files are only merged if they don't already exist in the output directory +#' - Uses parallel processing with (number of CPU cores - 2) workers +#' - The `-L` flag enables netCDF4 compression for smaller output files +#' - Automatically handles different time ranges for historical vs. projection scenarios +#' +#' @references +#' CDO User Guide: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf +#' CDO mergetime operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=102 +#' CDO selname operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=126 #' #' @export #' #' @examples #' \dontrun{ +#' # Get a path to a temporary directory +#' temp_dir <- tempdir() #' #' htr_merge_files( #' hpc = NA, -#' indir = file.path(base_dir, "data", "raw", "tos"), # input directory -#' outdir = file.path(base_dir, "data", "proc", "merged", "tos"), # output directory -#' year_start = 1985, # earliest year across all the scenarios considered -#' year_end = 2100 # latest year across all the scenarios considered +#' indir = system.file("extdata", package = "hotrstuff"), # input directory +#' outdir = file.path(temp_dir, "merged"), # output directory +#' year_start = 1990, # earliest year across all the scenarios considered +#' year_end = 2014 # latest year across all the scenarios considered #' ) #' } htr_merge_files <- function(hpc = NA, # if ran in the HPC, possible values are "array", "parallel" diff --git a/R/htr_regrid_esm.R b/R/htr_regrid_esm.R index 5bfecc6..2539803 100644 --- a/R/htr_regrid_esm.R +++ b/R/htr_regrid_esm.R @@ -1,23 +1,69 @@ -#' Regrid the ESMs +#' Regrid Earth System Model outputs to a common spatial grid +#' +#' This function regrids climate model outputs from their native grids to a common +#' regular latitude-longitude grid using CDO (Climate Data Operators). This is +#' essential for comparing and combining data from different climate models that +#' use different spatial grids. +#' +#' @details +#' Different climate models use various spatial grids (regular lat-lon, curvilinear, +#' unstructured, etc.), making direct comparison difficult. This function standardizes +#' all data to a regular latitude-longitude grid using CDO interpolation methods. +#' +#' The function uses different CDO remapping operators based on the variable type: +#' - **Precipitation (`pr`)**: Uses conservative remapping (`remapcon`) to preserve +#' total precipitation amounts +#' - **Other variables**: Uses bilinear interpolation (`remapbil`) for smooth interpolation +#' +#' The process: +#' 1. Creates a blank raster template at the specified resolution +#' 2. For each input file, determines the appropriate remapping method +#' 3. Applies CDO remapping: `cdo -s -L -remapXXX,template input output` +#' 4. Updates filenames to include "Regridded" prefix +#' 5. Cleans up the temporary template file +#' +#' The `-s` flag suppresses CDO messages, and `-L` enables netCDF4 compression. #' #' @author David Schoeman and Tin Buenafe #' #' @inheritParams htr_slice_period -#' @param cell_res Resolution to which the ESM will be regridded -#' @param layer The layer to be regridded +#' @param cell_res Numeric. Spatial resolution in degrees for the target grid +#' (e.g., 0.25 for quarter-degree resolution, 1.0 for one-degree resolution). +#' Default is 0.25 degrees. +#' @param layer Character string. Description of the data layer being regridded +#' (e.g., "annual", "monthly", "anomalies"). This is used for filename generation +#' and progress reporting. +#' +#' @return +#' No return value. The function creates regridded files in the specified output +#' directory with "Regridded" added to the layer name in the filename +#' (e.g., "_annual_" becomes "_RegriddedAnnual_"). +#' +#' @note +#' - Requires CDO (Climate Data Operators) to be installed and accessible from the system PATH +#' - Creates a temporary grid template file that is automatically cleaned up +#' - Uses conservative remapping for precipitation to preserve mass conservation +#' - Uses bilinear interpolation for other variables (consider `remapdis` for some applications) +#' - Progress messages show the model and scenario being processed +#' - Uses parallel processing when `hpc` is not set to "array" +#' +#' @references +#' CDO User Guide: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf +#' CDO remapbil operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=115 +#' CDO remapcon operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=116 +#' Grid remapping methods: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#section.1.3.2 #' #' @export #' #' @examples -#' #' \dontrun{ #' htr_regrid_esm( -#' hpc = NA, -#' file = NA, -#' indir = file.path(base_dir, "data", "proc", "yearly", "tos"), -#' outdir = file.path(base_dir, "data", "proc", "regridded", "yearly", "tos"), -#' cell_res = 0.25, -#' layer = "annual" +#' hpc = NA, +#' file = NA, +#' indir = file.path(base_dir, "data", "proc", "yearly", "tos"), +#' outdir = file.path(base_dir, "data", "proc", "regridded", "yearly", "tos"), +#' cell_res = 0.25, +#' layer = "annual" #' ) #' } htr_regrid_esm <- function(hpc = NA, # if ran in the HPC, possible values are "array", "parallel" diff --git a/R/htr_seasonal_frequency.R b/R/htr_seasonal_frequency.R index 334d970..6d72f2b 100644 --- a/R/htr_seasonal_frequency.R +++ b/R/htr_seasonal_frequency.R @@ -1,22 +1,79 @@ -#' Change frequency to seasonal frequency +#' Convert climate data to seasonal frequency +#' +#' This function converts monthly or daily climate data to seasonal averages by +#' selecting specific months and calculating their yearly means using CDO (Climate +#' Data Operators). This is useful for analyzing seasonal climate patterns and +#' reducing temporal resolution for specific seasonal analyses. +#' +#' @details +#' The function creates seasonal climate data through a two-step CDO process: +#' 1. **Month selection**: Uses `cdo selmon` to select only the months that define the season +#' 2. **Seasonal averaging**: Uses `cdo yearmonmean` to calculate yearly means across the selected months +#' +#' The CDO operations performed are: +#' ``` +#' cdo selmon,month1,month2,month3 input_file temp_file +#' cdo yearmonmean temp_file output_file +#' ``` +#' +#' This approach allows for flexible seasonal definitions (e.g., DJF for winter, +#' JJA for summer, or custom seasons like monsoon periods). The function automatically +#' updates filenames to include "_seasonal_" and the season name. +#' +#' Temporary files are used during processing and are stored in the specified +#' temporary directory. #' #' @inheritParams htr_slice_period -#' @param tempdir Temporary directory where specific months are selected -#' @param months Define season (based in the numbered format of months) -#' @param months_name Define seasone name for the filename's suffix +#' @param tempdir Character string. Directory for temporary files during processing. +#' Used to store intermediate files after month selection before seasonal averaging. +#' @param months Character vector. Month numbers defining the season in two-digit +#' format (e.g., `c("12", "01", "02")` for DJF, `c("06", "07", "08")` for JJA). +#' Must be zero-padded (e.g., "01" not "1"). +#' @param months_name Character string. Descriptive name for the season that will +#' be added to output filenames (e.g., "DJF", "JJA", "monsoon", "dry-season"). +#' +#' @return +#' No return value. The function creates seasonal files in the specified output +#' directory with "_seasonal_" and the season name added to the original filenames +#' (e.g., "_merged_" becomes "_seasonal_YYYYMMDD-YYYYMMDD_seasonname.nc"). +#' +#' @note +#' - Requires CDO (Climate Data Operators) to be installed and accessible from the system PATH +#' - Input data should be monthly or daily frequency for meaningful seasonal aggregation +#' - Month numbers must be zero-padded two-digit strings ("01", "02", etc.) +#' - Temporary files are created during processing but not automatically cleaned up +#' - Uses parallel processing when `hpc` is not set to "array" +#' - Ensure sufficient disk space in the temporary directory +#' +#' @references +#' CDO User Guide: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf +#' CDO selmon operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=125 +#' CDO yearmonmean operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=192 #' #' @export #' #' @examples #' \dontrun{ +#' # Create DJF (winter) seasonal data +#' htr_seasonal_frequency( +#' hpc = NA, +#' file = NA, +#' indir = here("data", "proc", "sliced", "omip", variable), +#' tempdir = here("data", "temporary"), +#' outdir = here("data", "proc", "seasonal", "omip", variable), +#' months = c("12", "01", "02"), # December, January, February +#' months_name = "DJF" # Winter season +#' ) +#' +#' # Create custom monsoon season #' htr_seasonal_frequency( -#' hpc = NA, -#' file = NA, -#' indir = here("data", "proc", "sliced", "omip", variable), -#' tempdir = here("data", "temporary"), -#' outdir = here("data", "proc", "seasonal", "omip", variable), -#' months = c("01", "02", "03"), # define season (in numbered format) -#' months_name = "jan-mar" # define season name +#' hpc = NA, +#' file = NA, +#' indir = here("data", "proc", "sliced", "omip", variable), +#' tempdir = here("data", "temporary"), +#' outdir = here("data", "proc", "seasonal", "omip", variable), +#' months = c("06", "07", "08", "09"), # June through September +#' months_name = "monsoon" #' ) #' } htr_seasonal_frequency <- function(hpc = NA, # if ran in the HPC, possible values are "array", "parallel" diff --git a/R/htr_shift_years.R b/R/htr_shift_years.R index fc12f33..d735bb9 100644 --- a/R/htr_shift_years.R +++ b/R/htr_shift_years.R @@ -1,18 +1,69 @@ -#' Shift years +#' Shift time coordinates in climate model data +#' +#' This function adjusts time coordinates in climate model files, particularly useful +#' for paleoclimate data where model years need to be shifted to correspond to actual +#' calendar years. It uses CDO (Climate Data Operators) to shift time coordinates +#' and updates filenames accordingly. +#' +#' @details +#' Some climate models, particularly paleoclimate simulations, use arbitrary year +#' numbering that doesn't correspond to actual calendar years. This function shifts +#' the time coordinates to align with real calendar years for proper temporal analysis. +#' +#' The function: +#' 1. Extracts metadata from input filenames to determine current year ranges +#' 2. Checks if years are less than 1200 (indicating they need adjustment) +#' 3. For files needing adjustment: +#' - Calculates new year ranges by adding the adjustment value +#' - Uses `cdo shifttime,Nyears` to shift the time coordinates +#' - Updates filenames with the new year ranges +#' 4. For files not needing adjustment: Simply copies them to the output directory +#' +#' The CDO command used for shifting is: +#' `cdo shifttime,adjust_value years input_file output_file` #' #' @author Tin Buenafe #' -#' @inheritParams htr_slice_period -#' @param adjust_value Years that will be used to adjust the time (could be positive or negative) +#' @param indir Character string. Directory containing input NetCDF files with +#' time coordinates that may need shifting. +#' @param outdir Character string. Directory where time-adjusted files will be saved. +#' @param adjust_value Numeric. Number of years to add to the time coordinates. +#' Can be positive or negative. For example, use 1653 to shift model years +#' 347-1006 to calendar years 2000-2659. +#' +#' @return +#' No return value. The function creates time-adjusted files in the specified output +#' directory. Files with years < 1200 are shifted and renamed with new year ranges, +#' while others are copied unchanged. +#' +#' @note +#' - Requires CDO (Climate Data Operators) to be installed and accessible from the system PATH +#' - Input files must follow CMIP6 naming conventions for proper metadata extraction +#' - The threshold of 1200 years is used to identify files needing time adjustment +#' - Uses parallel processing with (number of CPU cores - 2) workers +#' - Filenames are automatically updated to reflect the new time ranges +#' - Progress messages show which files are being processed +#' +#' @references +#' CDO User Guide: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf +#' CDO shifttime operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=143 #' #' @export #' #' @examples #' \dontrun{ +#' # Shift paleoclimate model years to modern calendar years +#' htr_shift_years( +#' indir = file.path(base_dir, "data", "proc", "regridded", "yearly", "tos"), +#' outdir = file.path(base_dir, "data", "proc", "shifted", "yearly", "tos"), +#' adjust_value = 1653 # Shift model years 347-1006 to 2000-2659 +#' ) +#' +#' # Shift backwards (negative adjustment) #' htr_shift_years( -#' indir = file.path(base_dir, "data", "proc", "regridded", "yearly", "tos"), -#' outdir = file.path(base_dir, "data", "proc", "ensemble", "mean", "tos"), -#' adjust_value = 1653 +#' indir = file.path(base_dir, "data", "proc", "regridded", "yearly", "tos"), +#' outdir = file.path(base_dir, "data", "proc", "shifted", "yearly", "tos"), +#' adjust_value = -50 # Shift 50 years backwards #' ) #' } htr_shift_years <- function(indir, diff --git a/R/htr_show_levels.R b/R/htr_show_levels.R index cdd9874..c0167ec 100644 --- a/R/htr_show_levels.R +++ b/R/htr_show_levels.R @@ -1,15 +1,57 @@ -#' Print depth-resolved levels +#' Display vertical levels in depth-resolved climate data #' -#' @inheritParams htr_slice_period +#' This function examines 3D ocean climate model data and displays the vertical +#' levels (depths or pressure levels) available in each file using CDO (Climate +#' Data Operators). This is useful for understanding the vertical structure of +#' ocean models before performing depth-related operations. +#' +#' @details +#' Ocean climate models use various vertical coordinate systems (depth in meters, +#' pressure levels, sigma coordinates, etc.). This function uses the CDO `showlevel` +#' operator to display the vertical levels present in each file, helping users +#' understand the vertical resolution and coordinate system. +#' +#' The function: +#' 1. Processes all NetCDF files in the input directory +#' 2. For each file, executes `cdo showlevel filename` to extract level information +#' 3. Prints the filename and returns the level information +#' 4. Collects all level information into a character vector +#' +#' This information is essential for: +#' - Understanding the vertical structure of ocean models +#' - Planning depth integration operations with [`htr_integrate_levels()`] +#' - Selecting appropriate level ranges for analysis +#' +#' @param indir Character string. Directory containing 3D NetCDF files with +#' vertical levels (typically ocean model outputs with depth or pressure coordinates). #' #' @return +#' Character vector containing the vertical level information for each file. +#' Each element corresponds to one input file and contains the level values +#' as returned by CDO showlevel. +#' +#' @note +#' - Requires CDO (Climate Data Operators) to be installed and accessible from the system PATH +#' - Input files must be 3D data with vertical coordinate dimensions +#' - Prints filenames to console for progress tracking +#' - Level values and units depend on the model's vertical coordinate system +#' - Does not use parallel processing (processes files sequentially) +#' +#' @references +#' CDO User Guide: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf +#' CDO showlevel operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=144 +#' #' @export #' #' @examples #' \dontrun{ -#' htr_show_levels( -#' indir = file.path(base_dir, "data", "proc", "sliced", "omip", variable) +#' # Display levels in ocean model data +#' levels_info <- htr_show_levels( +#' indir = file.path(base_dir, "data", "proc", "sliced", "omip", "thetao") #' ) +#' +#' # View the level information +#' print(levels_info) #' } htr_show_levels <- function(indir) { diff --git a/R/htr_slice_period.R b/R/htr_slice_period.R index acbbd92..ed7f7a6 100644 --- a/R/htr_slice_period.R +++ b/R/htr_slice_period.R @@ -1,30 +1,92 @@ -#' Slice Period +#' Extract specific time periods from climate model data +#' +#' This function extracts specific time periods from merged climate model files +#' using CDO (Climate Data Operators). It is essential for focusing analysis on +#' particular time ranges of interest, such as future projection periods or +#' specific historical periods. +#' +#' @details +#' Climate model data often spans long time periods, but analysis typically focuses +#' on specific time ranges. This function uses the CDO `selyear` operator to extract +#' the specified year range from merged time series files. +#' +#' The function: +#' 1. Filters files by frequency and scenario +#' 2. For each matching file, checks if it contains data outside the target period +#' 3. If trimming is needed, uses `cdo selyear,year_start/year_end` to extract the period +#' 4. Updates filenames to reflect the new time range +#' 5. Optionally removes original files if `overwrite = TRUE` +#' +#' The CDO command used is: +#' `cdo selyear,year_start/year_end input_file output_file` +#' +#' Files are only processed if they contain data outside the specified time range, +#' making the function efficient for large datasets. #' #' @author Dave Schoeman and Tin Buenafe #' -#' @param hpc Indicates whether the user is working in a HPC (High Performance Computing) facility -#' @param file For when using the "array" option in the HPC, the file name needs to be specified -#' @param indir Directory where input files are located -#' @param outdir Directory where output files will be saved -#' @param freq The temporal frequency to be used in the analysis -#' @param scenario The CMIP scenario to be used in the analysis. -#' @param year_start Starting year -#' @param year_end Ending year -#' @param overwrite Should the output files be overwritten if they already exist (defaults to TRUE) +#' @param hpc Character string or NA. Indicates High Performance Computing mode: +#' - `NA`: Standard processing mode +#' - `"array"`: HPC array job mode (requires `file` parameter) +#' - `"parallel"`: HPC parallel mode +#' @param file Character string or NA. Specific file to process when `hpc = "array"`. +#' Not used in other modes. +#' @param indir Character string. Directory containing merged NetCDF files to be +#' time-sliced. Files should be continuous time series created by [`htr_merge_files()`]. +#' @param outdir Character string. Directory where time-sliced files will be saved. +#' @param freq Character string. CMIP6 frequency identifier to filter files +#' (e.g., "Omon" for ocean monthly, "day" for daily, "Amon" for atmosphere monthly). +#' @param scenario Character string. CMIP6 scenario identifier to filter files +#' (e.g., "historical", "ssp126", "ssp245", "ssp585"). Use partial strings to +#' match multiple scenarios (e.g., "ssp" for all SSP scenarios). +#' @param year_start Numeric. Starting year for the time slice (inclusive). +#' @param year_end Numeric. Ending year for the time slice (inclusive). +#' @param overwrite Logical. If `TRUE` (default), removes original files after +#' successful time slicing. If `FALSE`, keeps original files. +#' +#' @return +#' No return value. The function creates time-sliced files in the specified output +#' directory with updated filenames reflecting the new time range +#' (e.g., "_merged_" becomes "_YYYYMMDD-YYYYMMDD.nc"). +#' +#' @note +#' - Requires CDO (Climate Data Operators) to be installed and accessible from the system PATH +#' - Input files must follow CMIP6 naming conventions for proper metadata extraction +#' - Files are only processed if they contain data outside the specified time range +#' - Uses parallel processing when `hpc` is not set to "array" +#' - **WARNING**: Setting `overwrite = TRUE` will delete original files +#' - Progress messages show which model and scenario combinations are being processed +#' +#' @references +#' CDO User Guide: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf +#' CDO selyear operator: https://code.mpimet.mpg.de/projects/cdo/embedded/cdo.pdf#page=124 #' #' @export #' #' @examples #' \dontrun{ +#' # Extract 21st century projection period +#' htr_slice_period( +#' hpc = NA, +#' indir = file.path(base_dir, "data", "proc", "merged", "tos"), +#' outdir = file.path(base_dir, "data", "proc", "sliced", "tos"), +#' freq = "Omon", # ocean monthly +#' scenario = "ssp", +#' year_start = 2020, +#' year_end = 2100, +#' overwrite = FALSE +#' ) +#' +#' # Extract historical baseline period #' htr_slice_period( -#' hpc = NA, -#' indir = file.path(base_dir, "data", "proc", "merged", "tos"), # input directory -#' outdir = file.path(base_dir, "data", "proc", "sliced", "tos"), # output directory -#' freq = "Omon", # ocean, daily -#' scenario = "ssp", -#' year_start = 2020, -#' year_end = 2100, -#' overwrite = FALSE +#' hpc = NA, +#' indir = file.path(base_dir, "data", "proc", "merged", "tos"), +#' outdir = file.path(base_dir, "data", "proc", "sliced", "tos"), +#' freq = "Omon", +#' scenario = "historical", +#' year_start = 1995, +#' year_end = 2014, +#' overwrite = TRUE #' ) #' } htr_slice_period <- function(hpc = NA, # if ran in the HPC, possible values are "array", "parallel" diff --git a/R/utils.R b/R/utils.R index 7549fe5..74aa51d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,9 +1,41 @@ -#' Function to convert a raster mask to a netCDF +#' Convert a raster mask to netCDF4 format for CDO compatibility #' -#' Based on http://geog.uoregon.edu/bartlein/courses/geog490/week04-netCDF.html#create-and-write-a-netcdf-file +#' This internal function converts a terra raster object to a netCDF4 file that +#' is compatible with CDO (Climate Data Operators). It's primarily used for +#' creating grid template files for regridding operations. +#' +#' @details +#' The function creates a properly formatted netCDF4 file with: +#' - Longitude and latitude dimensions with appropriate attributes +#' - Time dimension (set to 1850-01-01 for compatibility) +#' - 365-day calendar attribute for consistency with climate data +#' - Proper axis attributes (X, Y, T) for CDO recognition +#' - netCDF4 "classic model" format for CDO compatibility +#' +#' The process involves creating a temporary netCDF3 file, converting it to +#' netCDF4 format using `nccopy`, and then using CDO to invert latitude +#' ordering for proper orientation. #' #' @author David Schoeman #' +#' @param x A terra SpatRaster object to be converted to netCDF +#' @param pth Character string. Directory path where the netCDF file will be saved +#' @param ncName Character string. Name of the output netCDF file +#' @param dname Character string. Variable name in the netCDF file +#' @param dlname Character string. Long name for the variable +#' +#' @return +#' Character string. Full path to the created netCDF file. +#' +#' @note +#' - Requires `nccopy` utility (part of netCDF tools) to be available +#' - Requires CDO for latitude inversion (`cdo -invertlat`) +#' - Creates temporary files that are automatically cleaned up +#' - Uses 365-day calendar for consistency with climate model data +#' +#' @references +#' Based on: http://geog.uoregon.edu/bartlein/courses/geog490/week04-netCDF.html#create-and-write-a-netcdf-file +#' #' @noRd htr_mask2netCDF4 <- function(x, pth = paste0(getwd(), "/", "Data"), @@ -60,26 +92,61 @@ htr_mask2netCDF4 <- function(x, -#' Make a folder +#' Create directory if it doesn't exist +#' +#' This utility function creates a directory (and any necessary parent directories) +#' if it doesn't already exist. It's used throughout the hotrstuff package to +#' ensure output directories are available before processing. +#' +#' @param folder Character string. Path to the directory to be created. Can be +#' relative or absolute path. Parent directories will be created recursively +#' if they don't exist. #' -#' @param folder Character string of folder to be created +#' @return +#' No return value. The function creates the directory structure as needed. +#' +#' @note +#' - Uses `dir.create()` with `recursive = TRUE` to create parent directories +#' - Checks if directory already exists before attempting creation +#' - No error is thrown if directory already exists #' #' @export #' #' @examples #' \dontrun{ #' htr_make_folder("~/Data/output") +#' htr_make_folder("./results/processed/regridded") #' } htr_make_folder <- function(folder) { if (!isTRUE(file.info(folder)$isdir)) dir.create(folder, recursive = TRUE) } -#' Create a blank raster +#' Create a blank raster template for regridding operations +#' +#' This internal function creates a global regular latitude-longitude raster +#' template at the specified resolution and converts it to netCDF4 format for +#' use as a regridding target in CDO operations. #' +#' @details +#' The function creates a global raster covering -180 to 180 degrees longitude +#' and -90 to 90 degrees latitude at the specified resolution. All cells are +#' set to value 1, and the raster is converted to netCDF4 format using +#' [`htr_mask2netCDF4()`] for compatibility with CDO regridding operations. #' #' @author David Schoeman and Tin Buenafe #' +#' @param out_dir Character string. Directory where the template file will be created +#' @param cell_res Numeric. Spatial resolution in degrees (e.g., 0.25 for quarter-degree) +#' +#' @return +#' Character string. Full path to the created netCDF template file. +#' +#' @note +#' - Creates a file named "base_rast.nc" in the output directory +#' - Template file should be deleted after regridding operations +#' - Uses terra::rast() to create the initial raster template +#' #' @noRd htr_make_blankRaster <- function(out_dir, cell_res # resolution of the cell ) { @@ -100,11 +167,37 @@ htr_make_blankRaster <- function(out_dir, cell_res # resolution of the cell -#' Get data from range of years +#' Extract data from a specific year range using CDO +#' +#' This internal function extracts data from a specified year range using CDO's +#' `selyear` operator. It's used by [`htr_slice_period()`] to perform the actual +#' time slicing operations and handles filename generation for the output. #' +#' @details +#' The function checks if the input file's time range extends beyond the requested +#' years and uses CDO to extract only the specified period. It automatically +#' generates appropriate output filenames with the new time range. +#' +#' The CDO command used is: +#' `cdo selyear,year_start/year_end input_file output_file` #' #' @author David Schoeman and Tin Buenafe #' +#' @param nc_file Character string. Name of the input NetCDF file +#' @param yr1 Numeric. Starting year for extraction +#' @param yr2 Numeric. Ending year for extraction +#' @param infold Character string. Input directory path +#' @param outfold Character string. Output directory path +#' @param overwrite Logical. Whether to overwrite existing files +#' +#' @return +#' No return value. Creates time-sliced file in the output directory. +#' +#' @note +#' - Only processes files if their time range extends beyond the requested period +#' - Automatically generates output filenames with new time ranges +#' - Uses CMIP6 filename parsing for metadata extraction +#' #' @noRd htr_get_Years <- function(nc_file, yr1, yr2, infold, outfold, overwrite) { . <- NULL # Stop devtools::check() complaints about NSE @@ -134,10 +227,34 @@ htr_get_Years <- function(nc_file, yr1, yr2, infold, outfold, overwrite) { -#' Get combinations of variables, frequency, experiments/scenarios, models, and variants from the netCDF files +#' Extract metadata combinations from CMIP6 filenames +#' +#' This internal function extracts unique combinations of specified metadata +#' elements (variable, frequency, scenario, model, variant) from CMIP6-formatted +#' filenames in a directory. It's used to organize parallel processing tasks +#' by grouping files with common characteristics. +#' +#' @details +#' The function processes all files in a directory, extracts CMIP6 metadata +#' using [`htr_get_CMIP6_bits()`], and returns unique combinations of the +#' requested metadata elements. This is essential for organizing batch +#' processing operations where files need to be grouped by their characteristics. #' #' @author David Schoeman and Tin Buenafe #' +#' @param x Character string. Directory path containing CMIP6 files +#' @param string Character vector. Metadata elements to extract (e.g., +#' c("Variable", "Frequency", "Model", "Scenario", "Variant")) +#' +#' @return +#' List of character vectors containing unique combinations of the requested +#' metadata elements, suitable for use with parallel processing functions. +#' +#' @note +#' - Requires files to follow CMIP6 naming conventions +#' - Returns distinct combinations only (no duplicates) +#' - Output format is compatible with purrr::pwalk() for parallel processing +#' #' @noRd htr_get_meta <- function(x, string # refers to the aspects extracted per climate model @@ -157,10 +274,51 @@ htr_get_meta <- function(x, -#' Extract CMIP6 bits from the name of the file +#' Parse CMIP6 filename components and metadata +#' +#' This internal function parses CMIP6-formatted filenames to extract metadata +#' components including variable, frequency, model, scenario, variant, grid, +#' and time range information. It's essential for organizing and processing +#' climate model data based on their characteristics. +#' +#' @details +#' CMIP6 files follow a standardized naming convention: +#' `variable_frequency_model_scenario_variant_grid_timerange.nc` +#' +#' The function: +#' 1. Splits the filename by underscores to extract components +#' 2. Parses the time range component (7th element) to extract start/end dates +#' 3. Handles different frequency formats (monthly, yearly) by adjusting date formats +#' 4. Converts date strings to Date objects for proper temporal handling +#' +#' Special handling for different frequencies: +#' - Monthly data (`_.mon_`): Adds day 01 and 31 to start/end dates +#' - Yearly data (`_.year_`): Adds full date range (0101-1231) to years #' #' @author David Schoeman and Tin Buenafe #' +#' @param file_name Character string. CMIP6-formatted filename to parse +#' +#' @return +#' Named list containing: +#' - `Variable`: Climate variable name (e.g., "tos", "pr", "tas") +#' - `Frequency`: Temporal frequency (e.g., "Omon", "day", "Amon") +#' - `Model`: Climate model name (e.g., "ACCESS-ESM1-5", "CanESM5") +#' - `Scenario`: Experiment/scenario (e.g., "historical", "ssp126") +#' - `Variant`: Variant label (e.g., "r1i1p1f1") +#' - `Grid`: Grid label (e.g., "gn", "gr") +#' - `Year_start`: Start date as Date object +#' - `Year_end`: End date as Date object +#' +#' @note +#' - Assumes standard CMIP6 filename format with 7 underscore-separated components +#' - Handles different temporal frequency formats automatically +#' - Returns Date objects for proper temporal operations +#' - Used throughout the package for file organization and metadata extraction +#' +#' @references +#' CMIP6 Data Reference Syntax: https://pcmdi.llnl.gov/CMIP6/Guide/dataUsers.html +#' #' @noRd htr_get_CMIP6_bits <- function(file_name) { bits <- stringr::str_split(basename(file_name), "_") %>% diff --git a/_pkgdown.yml b/_pkgdown.yml index d71acfb..1031eae 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,40 @@ url: ~ template: bootstrap: 5 +development: + destination: + version_label: info + version_tooltip: "The package is in the early stages of development. Use with caution." +reference: +- title: Data Aquisition +- contents: + - htr_download_ESM + - htr_shift_years + +- title: Temporal Data Preparation +- contents: + - htr_merge_files + - htr_slice_period + - htr_fix_calendar + +- title: Temporal Processing +- contents: + - htr_change_freq + - htr_seasonal_frequency + +- title: Spatial Processing +- contents: + - htr_regrid_esm + - htr_integrate_levels + - htr_show_levels + +- title: Statistical Analysis +- contents: + - htr_calc_mean + - htr_calc_anomalies + - htr_create_ensemble + +- title: Utility Functions +- contents: + - htr_make_folder diff --git a/docs/404.html b/docs/404.html index c79a83d..38043a7 100644 --- a/docs/404.html +++ b/docs/404.html @@ -6,16 +6,15 @@ Page not found (404) • hotrstuff - - - - - - + + + + + - - + + @@ -27,7 +26,7 @@ hotrstuff - 0.0.2 + 0.0.2 - - - - - -
-
-
- -
-

Extract CMIP6 bits from the name of the file

-
- -
-

Usage

-
htr_get_CMIP6_bits(file_name)
-
- -
-

Author

-

David Schoeman and Tin Buenafe

-
- -
- - -
- - - -
- - - - - - - diff --git a/docs/reference/htr_get_Years.html b/docs/reference/htr_get_Years.html deleted file mode 100644 index fe34665..0000000 --- a/docs/reference/htr_get_Years.html +++ /dev/null @@ -1,81 +0,0 @@ - -Get data from range of years — htr_get_Years • hotrstuff - Skip to contents - - -
-
-
- -
-

Get data from range of years

-
- -
-

Usage

-
htr_get_Years(nc_file, yr1, yr2, infold, outfold, overwrite)
-
- -
-

Author

-

David Schoeman and Tin Buenafe

-
- -
- - -
- - - -
- - - - - - - diff --git a/docs/reference/htr_get_meta.html b/docs/reference/htr_get_meta.html deleted file mode 100644 index 0916b66..0000000 --- a/docs/reference/htr_get_meta.html +++ /dev/null @@ -1,81 +0,0 @@ - -Get combinations of variables, frequency, experiments/scenarios, models, and variants from the netCDF files — htr_get_meta • hotrstuff - Skip to contents - - -
-
-
- -
-

Get combinations of variables, frequency, experiments/scenarios, models, and variants from the netCDF files

-
- -
-

Usage

-
htr_get_meta(x, string)
-
- -
-

Author

-

David Schoeman and Tin Buenafe

-
- -
- - -
- - - -
- - - - - - - diff --git a/docs/reference/htr_integrate_levels.html b/docs/reference/htr_integrate_levels.html index 9fc6a0e..75b34df 100644 --- a/docs/reference/htr_integrate_levels.html +++ b/docs/reference/htr_integrate_levels.html @@ -1,5 +1,9 @@ -Get the weighted vertical means — htr_integrate_levels • hotrstuff +Calculate vertical means from depth-resolved climate data — htr_integrate_levels • hotrstuff Skip to contents @@ -7,7 +11,7 @@ hotrstuff - 0.0.2 + 0.0.2 - - - - - -
-
-
- -
-

Create a blank raster

-
- -
-

Usage

-
htr_make_blankRaster(out_dir, cell_res)
-
- -
-

Author

-

David Schoeman and Tin Buenafe

-
- -
- - -
- - - -
- - - - - - - diff --git a/docs/reference/htr_make_folder.html b/docs/reference/htr_make_folder.html index bd255eb..b5cf92d 100644 --- a/docs/reference/htr_make_folder.html +++ b/docs/reference/htr_make_folder.html @@ -1,5 +1,9 @@ -Make a folder — htr_make_folder • hotrstuff +Create directory if it doesn't exist — htr_make_folder • hotrstuff Skip to contents @@ -7,7 +11,7 @@ hotrstuff - 0.0.2 + 0.0.2 - - - - - -
-
-
- -
-

Based on http://geog.uoregon.edu/bartlein/courses/geog490/week04-netCDF.html#create-and-write-a-netcdf-file

-
- -
-

Usage

-
htr_mask2netCDF4(
-  x,
-  pth = paste0(getwd(), "/", "Data"),
-  ncName = "mask.nc",
-  dname = "tos",
-  dlname = "tos"
-)
-
- -
-

Author

-

David Schoeman

-
- -
- - -
- - - -
- - - - - - - diff --git a/docs/reference/htr_merge_files.html b/docs/reference/htr_merge_files.html index c8ceeb8..10ed28a 100644 --- a/docs/reference/htr_merge_files.html +++ b/docs/reference/htr_merge_files.html @@ -1,5 +1,11 @@ -Merge Files — htr_merge_files • hotrstuff +Merge climate model files into continuous time series — htr_merge_files • hotrstuff Skip to contents @@ -7,7 +13,7 @@ hotrstuff - 0.0.2 + 0.0.2