Skip to content

Commit 46bb42c

Browse files
author
virgesmith
committed
more tests for custom snpp; better checks for single values; update doc #37
1 parent e30dea9 commit 46bb42c

File tree

6 files changed

+45
-15
lines changed

6 files changed

+45
-15
lines changed

README.md

+23-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,29 @@
88

99
# ukpopulation: UK Demographic Projections
1010

11-
> ## Latest news: 1.1 release
11+
> ## Latest news: 1.2 release
12+
> - adds support for custom subnational population projections
13+
> ### Custom SNPP Data
14+
> An externally generated SNPP dataset (from e.g. [simim](https://github.com/nismod/simim)) can be registered with the `ukpopulation` package and used as if it was the standard ONS/StatsWales/NRScotland/NISRA projection:
15+
> ```python3
16+
> >>> import ukpopulation.customsnppdata as CustomSNPPData
17+
> >>> customdata = pd.read_csv("custom_snpp.csv")
18+
> >>> customdata.head()
19+
> GEOGRAPHY_CODE GENDER C_AGE OBS_VALUE PROJECTED_YEAR_NAME
20+
> 0 E06000005 1 0 603.0 2018
21+
> 1 E06000005 1 1 600.0 2018
22+
> 2 E06000005 1 2 624.0 2018
23+
> 3 E06000005 1 3 636.0 2018
24+
> 4 E06000005 1 4 661.0 2018
25+
> >>> CustomSNPPData.register_custom_projection("custom_snpp", customdata, "cache_directory")
26+
> Writing custom SNPP custom_snpp to cache/ukpopulation_custom_snpp_custom_snpp.csv
27+
> >>> CustomSNPPData.list_custom_projections("cache_directory")
28+
> ['custom_snpp']
29+
> >>>
30+
> ```
31+
> The external dataset must follow the format/column name conventions as above, but can also contain extra data if required for other use. The `GENDER` column should only take the values 1 (male) or 2 (female); the `C_AGE` column should contain the range 0-90 inclusive (90 meaning 90 or over).
32+
33+
> ## 1.1 release
1234
> - adds UK household projections
1335
> - initial support for custom SNPP variants
1436
> - better consistency across the MYE/NPP/SNPP APIs (breaks backwards compatibility)

tests/test_all.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,14 @@ def test_snpp_custom_projection(self):
252252
self.assertEqual(len(agg), 2)
253253
self.assertAlmostEqual(agg.OBS_VALUE.sum(), 30760.0, 5) # remember this is population under 46
254254

255-
255+
# test extrapolagg is equivalent to extrapolate + external agg
256+
years = range(custom.max_year()-1, custom.max_year() + 2)
257+
ext = utils.aggregate(custom.extrapolate(self.npp, "E06000001", years), ["GENDER", "C_AGE"])
258+
extagg = custom.extrapolagg(["GENDER", "C_AGE"], self.npp, "E06000001", years)
259+
self.assertTrue(ext.equals(extagg))
260+
self.assertEqual(len(ext.GEOGRAPHY_CODE.unique()), 1)
261+
self.assertEqual(ext.GEOGRAPHY_CODE.unique()[0], "E06000001")
262+
self.assertAlmostEqual(ext.OBS_VALUE.sum(), 279841.6197443956, 5)
256263

257264
# test datasets have consistent ranges
258265
def test_consistency(self):

ukpopulation/customsnppdata.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,13 @@ def filter(self, geog_codes, years=None, ages=range(0,91), genders=[1,2]):
6767

6868
if years is None:
6969
years=range(self.min_year(), self.max_year()+1)
70-
if isinstance(years, int):
70+
if np.isscalar(years):
7171
years = [years]
7272

73-
if isinstance(ages, int):
73+
if np.isscalar(ages):
7474
ages = [ages]
7575

76-
if isinstance(genders, int):
76+
if np.isscalar(genders):
7777
genders = [genders]
7878

7979
# check for any codes requested that werent present
@@ -111,16 +111,16 @@ def extrapolate(self, npp, geog_codes, year_range):
111111
for country in geog_codes:
112112
if not geog_codes[country]: continue
113113

114-
max_year = self.max_year()
115-
last_year = self.filter(geog_codes[country], max_year)
114+
maxyear = self.max_year()
115+
last_year = self.filter(geog_codes[country], maxyear)
116116

117-
(in_range, ex_range) = utils.split_range(year_range, max_year)
117+
(in_range, ex_range) = utils.split_range(year_range, maxyear)
118118
# years that dont need to be extrapolated
119119
all_years = self.filter(geog_codes[country], in_range) if in_range else pd.DataFrame()
120120

121121
for year in ex_range:
122122
data = last_year.copy()
123-
scaling = npp.year_ratio("ppp", country, max_year, year)
123+
scaling = npp.year_ratio("ppp", country, maxyear, year)
124124
data = data.merge(scaling[["GENDER", "C_AGE", "OBS_VALUE"]], on=["GENDER", "C_AGE"])
125125
data["OBS_VALUE"] = data.OBS_VALUE_x * data.OBS_VALUE_y
126126
data.PROJECTED_YEAR_NAME = year

ukpopulation/myedata.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
MYEData - wrapper around Mid-Year Estimate data by LAD, SYoA and gender
33
"""
44

5+
import numpy as np
56
import pandas as pd
67
import ukcensusapi.Nomisweb as Api
78
import ukpopulation.utils as utils
@@ -47,9 +48,9 @@ def filter(self, geog_codes, years=None, ages=range(0,91), genders=[1,2]):
4748
# ensure array inputs
4849
if isinstance(geog_codes, str):
4950
geog_codes = [geog_codes]
50-
if isinstance(ages, int):
51+
if np.isscalar(ages):
5152
ages = [ages]
52-
if isinstance(genders, int):
53+
if np.isscalar(genders):
5354
genders = [genders]
5455

5556
result = pd.DataFrame()

ukpopulation/nppdata.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,10 @@ def detail(self, variant_name, geog, years=None, ages=range(0,91), genders=[1,2]
9292
"""
9393
Return a subset of the raw data
9494
"""
95-
if isinstance(ages, int):
95+
if np.isscalar(ages):
9696
ages = [ages]
9797

98-
if isinstance(genders, int):
98+
if np.isscalar(genders):
9999
genders = [genders]
100100

101101
if not variant_name in NPPData.VARIANTS:

ukpopulation/snppdata.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,10 @@ def filter(self, geog_codes, years=None, ages=range(0,91), genders=[1,2]):
6868
if isinstance(geog_codes, str):
6969
geog_codes = [geog_codes]
7070

71-
if isinstance(ages, int):
71+
if np.isscalar(ages):
7272
ages = [ages]
7373

74-
if isinstance(genders, int):
74+
if np.isscalar(genders):
7575
genders = [genders]
7676

7777
countries = utils.country(geog_codes)

0 commit comments

Comments
 (0)