Skip to content

Commit

Permalink
bs_std function improvement
Browse files Browse the repository at this point in the history
Trying to remove duplicated rows and merge rows of df_std_BS where applicable
  • Loading branch information
gp1981 committed Dec 3, 2023
1 parent 7ddd8f7 commit 99fa931
Show file tree
Hide file tree
Showing 8 changed files with 17,503 additions and 489 deletions.
131 changes: 106 additions & 25 deletions code/Functions/data_retrieval.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,42 +78,123 @@ bs_std <- function(df_Facts) {
# Read the standardized_balancesheet.xlsx file
standardized_balancesheet <- read.xlsx(balancesheet_path, sheet = "Sheet1")

# Create a mapping between df_Facts_label and standardized_balancesheet_label
label_mapping <- df_Facts %>%
left_join(standardized_balancesheet, by = c("label" = "df_Facts_label")) %>%
select(label, standardized_balancesheet_label) %>%
# Rename standardized_balancesheet column df_Facts_label to perform left_join
standardized_balancesheet <- standardized_balancesheet %>%
rename(label = df_Facts_label)

# Merge df_Facts with standardized_balancesheet
df_std_BS <- df_Facts %>%
left_join(standardized_balancesheet, by = "label") %>%
select(standardized_balancesheet_label,everything(),-df_Fact_Description,)

# Filter out records not associated with standardized_balancesheet to create the mapping with df_Facts
df_std_BS_map <- df_std_BS %>%
filter(!is.na(standardized_balancesheet_label)) %>%
select(standardized_balancesheet_label,label,description) %>%
distinct()

# Select relevant columns from df_Facts
df_Facts_subset <- df_Facts %>%
select(label, end, fy, fp, form, val) %>%
left_join(label_mapping, by = "label")

# Merge with standardized_balancesheet to get the corresponding df_Facts_label
df_std_BS <- df_Facts_subset %>%
left_join(standardized_balancesheet, by = "standardized_balancesheet_label")

# Filter out records not associated with standardized_balancesheet
# Filter out records not associated with standardized_balancesheet to create the pivot
df_std_BS <- df_std_BS %>%
filter(!is.na(standardized_balancesheet_label)) %>%
select(-c(label,standardized_balancesheet_label, df_Fact_Description))
filter(!is.na(standardized_balancesheet_label)) %>%
select(standardized_balancesheet_label,end,val,fy,fp,form,filed,start)


# Pivot the data to the desired structure
df_std_BS <- df_std_BS %>%
# This function introduces a grouping by label, fy and fp and adds a logical column (has_form_A and has_form) to identify whether there is a row with /A and a row without /A for the same fy and fp. Rows without /A and with a corresponding row with /A will be filtered out.
df_std_BS_test <- df_std_BS %>%
filter(!is.na(standardized_balancesheet_label)) %>%
mutate(end = ymd(end), filed = ymd(filed)) %>% # convert to date format
group_by(standardized_balancesheet_label, end) %>%
filter(filed == max(filed)) %>% # filter rows with the most recent filing date
ungroup() %>%
select(standardized_balancesheet_label, end, val, fy, fp, form, filed, start) %>%
pivot_wider(
names_from = df_Facts_label,
names_from = standardized_balancesheet_label,
values_from = val
) %>%
arrange(desc(end))

# Perform the calculation for "Other Current assets (to balance out the Total Current Assets)"
# <---- NEED TO CHECK STILL DUPLICATED ROW WITH 1 VARIABLE see 2020-12-31 NA ---->
# Perform the calculation for additional records in the balancesheet
df_std_BS <- df_std_BS %>%
mutate(
Other_Current_Assets = `Assets, Current` -
(`Cash and Cash Equivalents, at Carrying Value` +
`Marketable Securities, Current` +
`Accounts Receivable, after Allowance for Credit Loss, Current` +
`Inventory, Net`)
'Total Long Term Assets' = ifelse(
!is.na('Total Long Term Assets'),
'Total Assets' - 'Total Current Assets',
NA_real_
)
)

,
'Other Current Assets' = ifelse(
!is.na('Total Current Assets') &
!is.na('Cash & Cash Equivalent') &
!is.na('Marketable Securities, Current') &
!is.na('Total Accounts Receivable') &
!is.na('Total Inventories'),
'Total Current Assets' -
('Cash & Cash Equivalent' +
'Marketable Securities, Current' +
'Total Accounts Receivable' +
'Total Inventories'),
NA_real_
)

,
'Other Long Term Assets' = ifelse(
!is.na('Total Long Term Assets') &
!is.na('Marketable Securities, Non Current') &
!is.na('Property, Plant and Equipment') &
!is.na('Intangible Assets (excl. goodwill)') &
!is.na('Goodwill'),
'Total Long Term Assets' -
('Marketable Securities, Non Current' +
'Property, Plant and Equipment' +
'Intangible Assets (excl. goodwill)' +
'Goodwill'),
NA_real_
),
'Other Current Liabilities' = ifelse(
!is.na('Total Current Liabilities') &
!is.na('Accounts Payable') &
!is.na('Tax Payable') &
!is.na('Commercial papers') &
!is.na('Short-Term Debt') &
!is.na('Operating Lease, Liability, Current'),
'Total Current Liabilities' -
('Accounts Payable' + 'Tax Payable' +
'Commercial papers' +
'Short-Term Debt' +
'Operating Lease, Liability, Current'),
NA_real_
),
'Other Long Term Liabilities' = ifelse(
!is.na('Total Long Term Liabilities') &
!is.na('Long Term Debts') &
!is.na('Operating Lease, Liability, Non Current') &
!is.na('Finance Lease, Liability, Non Current'),
'Total Long Term Liabilities' -
('Long Term Debts' +
'Operating Lease, Liability, Non Current' +
'Finance Lease, Liability, Non Current'),
NA_real_
),
'Other Company Stockholders Equity' = ifelse(
!is.na('Total Company Stockholders Equity') &
!is.na('Common Stock & Additional paid-in capital') &
!is.na('Common Stock, Value, Issued') &
!is.na('Additional Paid in Capital') &
!is.na('Preferred Stock') &
!is.na('Retained Earnings') &
!is.na('Accumulated other comprehensive income (loss)'),
'Total Company Stockholders Equity' -
('Common Stock & Additional paid-in capital' +
'Common Stock, Value, Issued' +
'Additional Paid in Capital' +
'Preferred Stock' +
'Retained Earnings' +
'Accumulated other comprehensive income (loss)'),
NA_real_
)
)

# Reorder columns dynamically based on the order in standardized_balancesheet
Expand Down
10 changes: 3 additions & 7 deletions code/Functions/data_visualization.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,11 @@ FactsList_to_Dataframe <- function(company_Facts_us_gaap) {
unnest(cols = c(units)) %>%
unnest(cols = c(units))

# Convert the val column to numeric
df_Facts$val <- as.numeric(df_Facts$val)

df_Facts <- as.data.frame(df_Facts)

# Mutate to reduce values in millions by dividing by 1 million
df_Facts <- df_Facts %>%
mutate(
val = val / 1e6,
formatted_val = scales::comma(val, accuracy = 0.1)
)
mutate(val = val / 1e6)

return(df_Facts)
}
Expand Down
Loading

0 comments on commit 99fa931

Please sign in to comment.