-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathcheck_consistent_measures.py
More file actions
70 lines (62 loc) · 2.38 KB
/
check_consistent_measures.py
File metadata and controls
70 lines (62 loc) · 2.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pandas as pd
import numpy as np
def check_consistent_measures(
data,
geography_col: str = "Org_Level",
measure_col: str = "Measure",
measures_set: set = set(),
) -> bool:
"""
Check every measure is in every geography level.
Parameters
----------
data : pd.DataFrame
DataFrame of data to check.
geography_col : str, default = "Org_Level"
Column name for the geography level.
measure_col : str, default = "Measure"
Column name for measure
measures_set : set, default = set()
Set of measures that should be in every geography level. If empty, the existing
global set is presumed to be correct.
Returns
-------
bool
Whether the checks have been passed.
Examples
--------
>>> check_consistent_measures(
... pd.DataFrame({
... "Geog" : ["National" ,"National", "Region", "Region", "Local", "Local",],
... "measure" : ["m1", "m2", "m1", "m2", "m1", "m2",],
... "Value_Unsuppressed" : [4, 2, 2, 1, 2, 1,],
... }),
... geography_col = "Geog",
... measure_col = "measure",
... measures_set = set({"m1", "m2"}),
... )
True
>>> check_consistent_measures(
... pd.DataFrame({
... "Org_Level" : ["National" ,"National", "Region", "Region", "Local", "Local",],
... "Measure" : ["m1", "m3", "m1", "m2", "m1", "m2",],
... "Value_Unsuppressed" : [4, 2, 2, 1, 2, 1,],
... })
... )
False
"""
if data.isna().any(axis=None):
raise ValueError(
f"Missing values at locations {list(map(tuple, np.argwhere(data.isna().values)))}"
)
if not isinstance(geography_col, str) or not isinstance(measure_col, str):
raise ValueError("Please input strings for column indexes.")
if not isinstance(measures_set, set):
raise ValueError("Please input a set object for measures")
if geography_col not in data.columns or measure_col not in data.columns:
raise KeyError("Check column names correspond to the DataFrame.")
# Every geography level should have the same set of measures as the global set.
global_set = measures_set if measures_set else set(data[measure_col].unique())
subsets = data.groupby(geography_col).agg({measure_col: "unique"})
subset_agreement = all(set(x) == global_set for x in subsets[measure_col])
return subset_agreement