Skip to content

Commit

Permalink
Read cost as a Decimal from collected invoices
Browse files Browse the repository at this point in the history
This will read the cost columns from collected invoices as a
Decimal, using the integration between Panda and PyArrow,
which allows Panda to use more data types.[0]

The decimal defined can represent numbers up to 12 significant
digits and up to 2 decimal points.

[0]. https://pandas.pydata.org/docs/user_guide/pyarrow.html
  • Loading branch information
knikolla committed Apr 25, 2024
1 parent b1f1af1 commit c83466f
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
11 changes: 9 additions & 2 deletions process_report/process_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@
import os
import sys
import datetime
from decimal import Decimal

import json
import pandas
import boto3
import pyarrow

# Define a decimal type that can accurately represent up to 12 significant
# digits, and 2 decimal digits.
pa_decimal_type = pyarrow.decimal128(12, 2)

### Invoice field names
INVOICE_DATE_FIELD = "Invoice Month"
Expand Down Expand Up @@ -229,7 +234,9 @@ def merge_csv(files):
"""Merge multiple CSV files and return a single pandas dataframe"""
dataframes = []
for file in files:
dataframe = pandas.read_csv(file)
dataframe = pandas.read_csv(
file, dtype={COST_FIELD: pandas.ArrowDtype(pa_decimal_type)}
)
dataframes.append(dataframe)

merged_dataframe = pandas.concat(dataframes, ignore_index=True)
Expand Down Expand Up @@ -298,7 +305,7 @@ def apply_credits_new_pi(dataframe, old_pi_file):

dataframe[CREDIT_FIELD] = None
dataframe[CREDIT_CODE_FIELD] = None
dataframe[BALANCE_FIELD] = 0
dataframe[BALANCE_FIELD] = Decimal(0)

old_pi_dict = load_old_pis(old_pi_file)

Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pandas
pyarrow
boto3

0 comments on commit c83466f

Please sign in to comment.