Skip to content
This repository has been archived by the owner on Nov 23, 2023. It is now read-only.

Add functions to prepare tables for weekly statistics from Github. #5

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Imports:
base64enc,
lubridate,
ghql
RoxygenNote: 7.1.2
RoxygenNote: 7.2.1
Suggests:
covr,
testthat (>= 3.0.0),
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,8 @@ export(gh_score)
export(gh_user_event_get)
export(gh_user_get)
export(gh_user_repos_external_get)
export(gh_weekly_codelines_dt)
export(gh_weekly_codelines_get)
export(gh_weekly_commits_dt)
export(gh_weekly_commits_get)
importFrom(magrittr,"%>%")
122 changes: 122 additions & 0 deletions R/gh_weekly_codelines.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#' @export
#'
#' @title Weekly number of added and removed code lines from Github repos
#'
#' @description This function prepares table for weekly number of lines of code
#' added and removed per week from all repositories from a Github account.
#'
#' @importFrom magrittr %>%
#'
#' @name gh_weekly_codelines_dt
#'
#' @param full_names A character vector containing names of repositories (format
#' 'org/repo').
#' @param year_for_stats A character year to filter statistics.
#' @param unit A unit to show values for added and removed lines.
#' @param ... Pass down options to \code{gh::gh()}
#'
#' @examples
#' \dontrun{
#' repos_openpharma <- gh_repos_get(org = "openpharma") %>%
#' gh_repos_clean()
#'
#' gh_weekly_stats_dt <- gh_weekly_codelines_dt(full_names = repos_openpharma$full_name,
#' year_of_stats = "2022",
#' unit = "thous")
#' }
#'
#' @returns A tibble with columns of added and removed lines numbers per week.

gh_weekly_codelines_dt <- function(
full_names,
year_for_stats,
unit = c("none", "thous", "mln", "bln"),
...
) {

gh_weekly_stats <- gh_weekly_codelines_get(full_names,
year_for_stats,
...)

unit <- match.arg(unit)

gh_weekly_df <- gh_weekly_stats %>%
purrr::map(~dplyr::bind_rows(.)) %>%
data.table::rbindlist() %>%
dplyr::group_by(time) %>%
dplyr::summarise(added = sum(added),
removed = sum(removed))

units <- switch(unit,
"none" = 1,
"thous" = 1e3,
"mln" = 1e6,
"bln" = 1e9)

gh_weekly_df <- gh_weekly_df %>%
dplyr::mutate(week_index = lubridate::week(time),
days_week = paste0(time, " - ", time + lubridate::days(7)),
unit = unit,
added_n = round(added/units, 1),
removed_n = round(-removed/units, 1))

gh_weekly_df

}

#' @export
#'
#' @title Get weekly number of lines of code from Github repos
#'
#' @name gh_weekly_code_lines_get
#'
#' @param full_names A character vector containing names of repositories (format 'org/repo').
#' @param year_for_stats A character year to filter statistics.
#' @param ... Pass down options to \code{gh::gh()}
#'
#' @description This function is a handy wrapper to iterate requests for all repos
#' of a given Github account to the Github endpoint
#' \code{https://api.github.com/repos/OWNER/REPO/stats/code_frequency} For
#' more information on the endpoint you can visit:\cr\cr
#' \link{https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#get-the-weekly-commit-activity}
#'
#' @returns A list of repo stats, with every slot (repo) containing: \itemize{
#' \item{Starting date of a week.} \item{Lines of code added in a given week.}
#' \item{Lines of code removed in a given week.}}

gh_weekly_codelines_get <- function(
full_names,
year_for_stats,
...
) {

if(interactive()) message(
glue::glue("Pulling code_frequency stats for year {year_for_stats}")
)

# pb <- dplyr::progress_estimated(length(full_names))

weekly_stats <- purrr::map(full_names, function(x){

request <- gh::gh("GET /repos/:repo_name/stats/code_frequency",
repo_name = x,
# .pb = pb,
...
)

stat <- purrr::map(request, function(x){
unix <- x[[1]]
x[[1]] <- as.POSIXct(unix, origin="1970-01-01")
names(x) <- c("time", "added", "removed")
x
}) %>% purrr::keep(~lubridate::year(.x$time) == year_for_stats)

stat
})

repo_names <- sub(".*/", "", full_names)
names(weekly_stats) <- repo_names

weekly_stats

}
91 changes: 91 additions & 0 deletions R/gh_weekly_commits.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#' @export
#'
#' @title Prepare table for Github weekly number of commits
#'
#' @importFrom magrittr %>%
#'
#' @name gh_weekly_commits_dt
#'
#' @param full_names A character vector containing names of repositories (format
#' 'org/repo').
#' @param ... Pass down options to \code{gh::gh()}
#'
#' @examples
#' \dontrun{
#' repos_openpharma <- gh_repos_get(org = "openpharma") %>%
#' gh_repos_clean()
#'
#' gh_weekly_stats <- gh_weekly_commits_dt(full_names = repos_openpharma$full_name)
#' }
#'
#' @returns A dataframe with number of commits per week for last 52 weeks.

gh_weekly_commits_dt <- function(
full_names,
...
) {

gh_weekly_commits_stats <- gh_weekly_commits_get(full_names,
...)

commits_weekly_df <- purrr::map(gh_weekly_commits_stats, function(x){
x <- x[["all"]]
}) %>%
purrr::discard(function(x){
all(purrr::map_lgl(x, ~length(.) == 0))
}) %>%
purrr::map(function(x){
x <- unlist(x)
names(x) <- as.character(c(1:52))
data.table::data.table(x) %>%
data.table::transpose()
}) %>%
data.table::rbindlist() %>%
purrr::map_dfc(function(x){
sum(x, na.rm = TRUE)
}) %>% data.table::transpose()

names(commits_weekly_df) <- "commits_number"

commits_weekly_df$week_index <- as.numeric(rownames(commits_weekly_df))

commits_weekly_df

}

#' @export
#'
#' @title Get number of commits per week from a Github account
#'
#' @name gh_weekly_commits_get
#'
#' @param full_names A character vector containing names of repositories (format 'org/repo').
#' @param ... Pass down options to \code{gh::gh()}
#'
#' @description This function captures number of commits from all repos of a
#' given Github account from last 52 weeks
#'
#' @details This function is a handy wrapper to iterate requests for all repos
#' of a given Github account to the Github endpoint
#' \code{https://api.github.com/repos/OWNER/REPO/stats/participation} For more
#' information on the endpoint you can visit:\cr\cr
#' \link{https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#get-the-weekly-commit-count}
#'
#' @returns A list of repo stats, with every slot (repo) containing number of
#' commits per week.

gh_weekly_commits_get <- function(
full_names,
...
) {

commits_weekly <- purrr::map(full_names, function(x){
gh::gh("GET /repos/:repo_name/stats/participation",
repo_name = x,
...
)
})

commits_weekly

}
28 changes: 28 additions & 0 deletions man/gh_weekly_code_lines_get.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 41 additions & 0 deletions man/gh_weekly_codelines_dt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions man/gh_weekly_commits_dt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions man/gh_weekly_commits_get.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.