Skip to content

Commit

Permalink
Add a parser for Schwab Equity Award data (#277)
Browse files Browse the repository at this point in the history
* Added parser for Charles Schwab Equity Award transactions in JSON format.

* Add pandas dependency.

* Fixed pre-submit tests.

* Update poetry's lock.

* Updated schwab API URL.

* Increase decimal count to avoid approximation errors due to stock-split.

* Add Schwab Equity Awards to the README.md.

* Restore upstream poetry.lock to resolve conflict.

* Update poetry.lock to resolve merge conflict.

* Fix python-typing check error.
  • Loading branch information
thibwk committed Oct 11, 2022
1 parent 974bf31 commit 955d0d4
Show file tree
Hide file tree
Showing 11 changed files with 609 additions and 12 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ You will need several input files:
- Exported transaction history from Schwab in CSV format since the beginning.
Or at least since you first acquired the shares, which you were holding during the tax year.
[See example](https://github.com/KapJI/capital-gains-calculator/blob/main/tests/test_data/schwab_transactions.csv).
- Exported transaction history from Schwab Equity Awards (e.g. for Alphabet/Google employees) since the beginning. These are to be downloaded in JSON format. Instructions are available at the top of the [parser file](../blob/cgt_calc/parsers/schwab_equity_award_json.py).
- Exported transaction history from Trading 212.
You can use several files here since Trading 212 limit the statements to 1 year periods.
[See example](https://github.com/KapJI/capital-gains-calculator/tree/main/tests/test_data/trading212).
Expand Down
7 changes: 7 additions & 0 deletions cgt_calc/args_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ def create_parser() -> argparse.ArgumentParser:
nargs="?",
help="file containing schwab award data for stock prices",
)
parser.add_argument(
"--schwab_equity_award_json",
type=str,
default=None,
nargs="?",
help="file containing schwab equity award transactions data in JSON format",
)
parser.add_argument(
"--trading212",
type=str,
Expand Down
7 changes: 6 additions & 1 deletion cgt_calc/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,7 +665,12 @@ def main() -> int:

# Read data from input files
broker_transactions = read_broker_transactions(
args.schwab, args.schwab_award, args.trading212, args.mssb, args.sharesight
args.schwab,
args.schwab_award,
args.schwab_equity_award_json,
args.trading212,
args.mssb,
args.sharesight,
)
converter = CurrencyConverter(args.exchange_rates_file)
initial_prices = InitialPrices(read_initial_prices(args.initial_prices))
Expand Down
9 changes: 9 additions & 0 deletions cgt_calc/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from .mssb import read_mssb_transactions
from .schwab import read_schwab_transactions
from .schwab_equity_award_json import read_schwab_equity_award_json_transactions
from .sharesight import read_sharesight_transactions
from .trading212 import read_trading212_transactions

Expand Down Expand Up @@ -44,6 +45,7 @@ def __str__(self) -> str:
def read_broker_transactions(
schwab_transactions_file: str | None,
schwab_awards_transactions_file: str | None,
schwab_equity_award_json_transactions_file: str | None,
trading212_transactions_folder: str | None,
mssb_transactions_folder: str | None,
sharesight_transactions_folder: str | None,
Expand All @@ -57,6 +59,13 @@ def read_broker_transactions(
else:
print("WARNING: No schwab file provided")

if schwab_equity_award_json_transactions_file is not None:
transactions += read_schwab_equity_award_json_transactions(
schwab_equity_award_json_transactions_file
)
else:
print("WARNING: No schwab Equity Award JSON file provided")

if trading212_transactions_folder is not None:
transactions += read_trading212_transactions(trading212_transactions_folder)
else:
Expand Down
252 changes: 252 additions & 0 deletions cgt_calc/parsers/schwab_equity_award_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
"""Charles Schwab Equity Award JSON export parser.
To get the data from Schwab:
1. Open https://client.schwab.com/Apps/accounts/transactionhistory/#/
2. Make sure Equity Award Center is selected
3. Select date range ALL and click SEARCH
4. In chrome devtools, look for an API call to
https://ausgateway.schwab.com/api/is.TransactionHistoryWeb/TransactionHistoryInterface/TransactionHistory/equity-award-center/transactions
5. Copy response JSON inside schwab_input.json and run schwab.py
"""
from __future__ import annotations

import datetime
from decimal import Decimal
import json
from pathlib import Path
from typing import Any

from pandas.tseries.holiday import USFederalHolidayCalendar # type: ignore
from pandas.tseries.offsets import CustomBusinessDay # type: ignore

from cgt_calc.exceptions import ParsingError
from cgt_calc.model import ActionType, BrokerTransaction

# Delay between a (sale) trade, and when it is settled.
SETTLEMENT_DELAY = 2 * CustomBusinessDay(calendar=USFederalHolidayCalendar())

JsonRowType = Any # type: ignore


def action_from_str(label: str) -> ActionType:
"""Convert string label to ActionType."""
if label in {"Buy"}:
return ActionType.BUY

if label in {"Sell", "Sale"}:
return ActionType.SELL

if label in {
"MoneyLink Transfer",
"Misc Cash Entry",
"Service Fee",
"Wire Funds",
"Wire Transfer",
"Funds Received",
"Journal",
"Cash In Lieu",
}:
return ActionType.TRANSFER

if label in {"Stock Plan Activity", "Deposit"}:
return ActionType.STOCK_ACTIVITY

if label in ["Qualified Dividend", "Cash Dividend"]:
return ActionType.DIVIDEND

if label in ["NRA Tax Adj", "NRA Withholding", "Foreign Tax Paid"]:
return ActionType.TAX

if label == "ADR Mgmt Fee":
return ActionType.FEE

if label in ["Adjustment", "IRS Withhold Adj"]:
return ActionType.ADJUSTMENT

if label in ["Short Term Cap Gain", "Long Term Cap Gain"]:
return ActionType.CAPITAL_GAIN

if label == "Spin-off":
return ActionType.SPIN_OFF

if label == "Credit Interest":
return ActionType.INTEREST

if label == "Reinvest Shares":
return ActionType.REINVEST_SHARES

if label == "Reinvest Dividend":
return ActionType.REINVEST_DIVIDENDS

if label == "Wire Funds Received":
return ActionType.WIRE_FUNDS_RECEIVED

raise ParsingError("schwab transactions", f"Unknown action: {label}")


def _round_decimal(num: Decimal) -> Decimal:
# We want enough decimals to cover what Schwab gives us (2 decimals)
# divided by the share-split factor (20), so we keep 4 decimals.
# We don't want more decimals than necessary or we risk converting
# the float number format approximations into Decimals
# (e.g. a number 1.0001 in JSON may become 1.00010001 when parsed
# into float, but we want to get Decimal('1.0001'))
return num.quantize(Decimal(".0001")).normalize()


def _get_decimal_or_default(
row: JsonRowType, key: str, default: Decimal | None = None
) -> Decimal | None:
if key in row and row[key]:
if isinstance(row[key], float):
return _round_decimal(Decimal.from_float(row[key]))

return Decimal(row[key])

return default


def _get_decimal(row: JsonRowType, key: str) -> Decimal:
return _get_decimal_or_default(row, key, Decimal(0)) # type: ignore


def _price_from_str(price_str: str) -> Decimal:
# example: "$1,250.00",
# remove $ sign, and coma thousand separators:
return Decimal(price_str.replace("$", "").replace(",", ""))


class SchwabTransaction(BrokerTransaction):
"""Represent single Schwab transaction."""

def __init__(
self,
row: JsonRowType,
file: str,
) -> None:
"""Create a new SchwabTransaction from a JSON row."""
description = row["description"]
self.raw_action = row["action"]
action = action_from_str(self.raw_action)
symbol = row.get("symbol")
quantity = _get_decimal_or_default(row, "quantitySortValue")
amount = _get_decimal(row, "amountSortValue")
fees = _get_decimal(row, "totalCommissionsAndFeesSortValue")
if row["action"] == "Deposit":
if len(row["transactionDetails"]) != 1:
raise ParsingError(
file,
"Expected a single transactionDetails for a Deposit, but "
f"found {len(row['transactionDetails'])}",
)
date = datetime.datetime.strptime(
row["transactionDetails"][0]["vestDate"], "%m/%d/%Y"
).date()
price = _price_from_str(row["transactionDetails"][0]["vestFairMarketValue"])
description = (
f"Vest from Award Date "
f'{row["transactionDetails"][0]["awardDate"]} '
f'(ID {row["transactionDetails"][0]["awardName"]})'
)
elif row["action"] == "Sale":
# Schwab's data export shows the settlement date,
# whereas HMRC wants the trade date:
date = (
datetime.datetime.strptime(row["eventDate"], "%m/%d/%Y").date()
- SETTLEMENT_DELAY
).date()
# Schwab's data export lacks decimals on Sales quantities,
# so we infer it from the amount and salePrice.
price_str = row["transactionDetails"][0]["salePrice"]
price = _price_from_str(price_str)

# Schwab only gives us overall transaction amount, and sale price
# of the sub-transactions. We can only work-out the correct
# quantity if all sub-transactions have the same price:
for subtransac in row["transactionDetails"][1:]:
if subtransac["salePrice"] != price_str:
raise ParsingError(
file,
"Impossible to work out quantity of sale of date"
f"{date} and amount {amount} because different "
"sub-transaction have different sale prices",
)

quantity = (amount + fees) / price
else:
raise ParsingError(
file, f'Parsing for action {row["action"]} is not implemented!'
)

currency = "USD"
broker = "Charles Schwab"
super().__init__(
date,
action,
symbol,
description,
quantity,
price,
fees,
amount,
currency,
broker,
)

self._normalize_split()

def _normalize_split(self) -> None:
"""Ensure past transactions are normalized to split values.
This is in the context of the 20:1 stock split which happened at close
on 2022-07-15 20:1.
As of 2022-08-07, Schwab's data exports have some past transactions
corrected for the 20:1 split on 2022-07-15, whereas others are not.
"""
split_factor = 20

# The share price has never been above $175*20=$3500 before 2022-07-15
# so this price is expressed in pre-split amounts: normalize to post-split
if (
self.date <= datetime.date(2022, 7, 15)
and self.price
and self.price > 175
and self.quantity
):
self.price = _round_decimal(self.price / split_factor)
self.quantity = _round_decimal(self.quantity * split_factor)


def read_schwab_equity_award_json_transactions(
transactions_file: str,
) -> list[BrokerTransaction]:
"""Read Schwab transactions from file."""
try:
with Path(transactions_file).open(encoding="utf-8") as json_file:
try:
data = json.load(json_file)
except json.decoder.JSONDecodeError as exception:
raise ParsingError(
transactions_file,
"Cloud not parse content as JSON",
) from exception

if "transactions" not in data or not isinstance(data["transactions"], list):
raise ParsingError(
transactions_file,
"no 'transactions' list found: the JSON data is not "
"in the expected format",
)

transactions = [
SchwabTransaction(transac, transactions_file)
for transac in data["transactions"]
# Skip as not relevant for CGT
if transac["action"] not in {"Journal", "Wire Transfer"}
]
transactions.reverse()
return list(transactions)
except FileNotFoundError:
print(f"WARNING: Couldn't locate Schwab transactions file({transactions_file})")
return []
Loading

0 comments on commit 955d0d4

Please sign in to comment.