-
Notifications
You must be signed in to change notification settings - Fork 0
/
scale.py
136 lines (119 loc) · 4.62 KB
/
scale.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import numpy as np
from scipy.integrate import trapezoid
from scipy.signal import convolve2d
def scale_data(data, scale):
"""Change the scale of given data.
Input:
data:
Two-dimensional matrix of numbers in [0, 1].
It is assumed that these numbers represent
fractions of people with certain
characteristic.
scale:
Scale of newly made geographical units. Units
are assumed to take square shape (i.e., their
size is [scale x scale]).
Output:
scaled_data:
Two-dimensional matrix which contains
fractions of people with certain
characteristic when viewed from a larger
scale.
Important:
Note that the scaled_data will be of smaller size
as geographical units are not allowed to overlap.
Note that the border regions are taken into
account even if they from incomplete (contain less
than scale*scale smallest units).
"""
out_x = np.ceil(data.shape[0] / scale).astype(int)
out_y = np.ceil(data.shape[1] / scale).astype(int)
out = np.zeros((out_x, out_y))
for ix, x in enumerate(np.arange(0, data.shape[0], scale)):
for iy, y in enumerate(np.arange(0, data.shape[1], scale)):
out[ix, iy] = np.mean(data[x : x + scale, y : y + scale])
return out
def auto_scale(data, analysis_fn=np.std):
"""Perform analysis at automatically selected scales.
Input:
data:
Two-dimensional matrix of numbers in [0, 1].
It is assumed that these numbers represent
fractions of people with certain
characteristic.
analysis_fn:
The function used to perform analysis.
Output:
unit_sizes:
Unit sizes at which analysis was performed.
values:
Values obtained by analysis_fn when analyzing
scaled data. Normalized by the first value.
"""
scales = (2 ** np.arange(0, np.floor(np.log2(np.min(data.shape))))).astype(int)
vals = np.array([analysis_fn(scale_data(data, scale)) for scale in scales])
if vals[0] != 0:
rel_vals = vals / vals[0]
else:
rel_vals = np.zeros(vals.shape)
rel_vals[0] = 1
unit_sizes = scales**2
return (unit_sizes, rel_vals)
def calculate_scaling_index(unit_sizes, data_vals):
"""Calculate scaling index in comparison to some baseline.
Input:
unit_sizes:
Unit sizes at which analysis as performed.
data_vals:
Analysis values obtained for the data.
Output:
index:
Value between -1 and 1. The closer values is
to 0, the less difference between the
baseline model and data.
"""
min_vals = np.zeros(unit_sizes.shape)
min_vals[0] = 1
minimal_area = trapezoid(min_vals, unit_sizes)
maximal_area = unit_sizes[-1] - 1
baseline_area = trapezoid(1 / np.sqrt(unit_sizes), unit_sizes)
data_area = trapezoid(data_vals, unit_sizes)
index = 0
if data_area < baseline_area:
# In this case data_area will be in [minimal_area,
# baseline_area] interval. We would like that
# data_area=minimal_area would imply index=-1, while
# data_area=baseline_area would imply index=0.
index = (data_area - minimal_area) / (baseline_area - minimal_area) - 1
else:
# In this case data_area will be in [baseline_area,
# unit_sizes[-1]-1] interval. We would like that
# data_area=baseline_area would imply index=0, while
# data_area=maximal_area would imply index=1.
index = (data_area - baseline_area) / (maximal_area - baseline_area)
return index
def scale_data_convolve(data, scale):
"""Change the scale of given data using convolution.
Input:
data:
Two-dimensional matrix of numbers in [0, 1].
It is assumed that these numbers represent
fractions of people with certain
characteristic.
scale:
Scale of newly made geographical units. Units
are assumed to take square shape (i.e., their
size is [scale x scale]).
Output:
scaled_data:
Two-dimensional matrix which contains
fractions of people with certain
characteristic when viewed from a larger
scale.
Important:
Note that the newly made geographical units are
allowed to overlap.
"""
averaging_kernel = np.ones((scale, scale)) / (scale**2)
scaled_data = convolve2d(data, averaging_kernel, mode="same", boundary="wrap")
return scaled_data