Skip to content

Commit

Permalink
set_index on dataframe and groupby sum on duplicated index values (#93)
Browse files Browse the repository at this point in the history
* set_index on dataframe and groupby sum on duplicated index values

* fixed testcase
  • Loading branch information
veenstrajelmer authored Apr 24, 2024
1 parent 054566c commit 94f921f
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 6 deletions.
4 changes: 4 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
History
=======

UNRELEASED
----------
* avoid duplicated periods in dataframe returned by `ddlpy.measurements_amount()` https://github.com/Deltares/ddlpy/pull/93

0.4.0 (2024-04-08)
------------------
* added `catalog_filter` argument to `ddlpy.locations()` to enabling retrieving the extended catalog in https://github.com/Deltares/ddlpy/pull/87
Expand Down
8 changes: 5 additions & 3 deletions ddlpy/ddlpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,13 @@ def measurements_amount(location, start_date, end_date, period="Jaar"):
df["Groeperingsperiode.Dag"].apply(lambda x: f"{x:02d}"))

# select columns from dataframe and append to list
df = df[["Groeperingsperiode","AantalMetingen"]]
df = df.set_index("Groeperingsperiode")
df = df[["AantalMetingen"]]
df_list.append(df)

# concatenate
amount_all = pd.concat(df_list).sort_values("Groeperingsperiode").reset_index(drop=True)
# concatenate and sum duplicated index
amount_all = pd.concat(df_list).sort_index()
amount_all = amount_all.groupby(amount_all.index).sum()
return amount_all


Expand Down
6 changes: 3 additions & 3 deletions tests/test_ddlpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,13 @@ def test_measurements_amount(location):
end_date = dt.datetime(1953, 4, 5)
data_amount_dag = ddlpy.measurements_amount(location, start_date=start_date, end_date=end_date, period="Dag")
assert data_amount_dag.shape[0] > 50
assert data_amount_dag["Groeperingsperiode"].str.len().iloc[0] == 10
assert data_amount_dag.index.str.len()[0] == 10
data_amount_maand = ddlpy.measurements_amount(location, start_date=start_date, end_date=end_date, period="Maand")
assert data_amount_maand.shape[0] == 4
assert data_amount_maand["Groeperingsperiode"].str.len().iloc[0] == 7
assert data_amount_maand.index.str.len()[0] == 7
data_amount_jaar = ddlpy.measurements_amount(location, start_date=start_date, end_date=end_date, period="Jaar")
assert data_amount_jaar.shape[0] == 1
assert data_amount_jaar["Groeperingsperiode"].str.len().iloc[0] == 4
assert data_amount_jaar.index.str.len()[0] == 4


def test_measurements_latest(location):
Expand Down

0 comments on commit 94f921f

Please sign in to comment.