extract_area.py 5.64 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
## Usage: python extract_area.py TILE [RESOLUTION THRESHOLD ALPHA_BOAR_BOAR ALPHA_BOAR_STATIC ALPHA_OTHER_PATHWAYS N1 N2...]

## assuming that RESOLUTION >= max relevant distances
## i.e. >= Ni (neighbourhood distances)
## and >= alpha * sqrt(ln(1/epsilon)) for all alphas (cut-off distance)
## with this assumption, works fine with 8 Moore neighbours
## otherwise revise how adjacent tiles are retrieved but careful with computational cost

import itertools as it
import sys
import time
import numpy as np
import pandas as pd
import scipy.sparse as sp
from scipy.spatial import distance_matrix

TARGET_TILE = int(sys.argv[1])
if len(sys.argv) > 2:
    C = int(sys.argv[2])
    epsilon = float(sys.argv[3])
    alpha_boar_boar = int(sys.argv[4])
    alpha_boar_static = int(sys.argv[5])
    alpha_other_pathways = int(sys.argv[6])
    neighbourhoods = [ int(sys.argv[i]) for i in range(7, len(sys.argv))]
else:
    C = 15000
    alpha_boar_boar = 2000
    alpha_boar_static = 1000
    alpha_other_pathways = 1000
    epsilon = 1e-9
    neighbourhoods = [3000, 10000, 15000]


print("Extracting tile", TARGET_TILE, "with resolution =", C, "m and pruning threshold ", epsilon)
print("Alpha values:", alpha_boar_boar, alpha_boar_static, alpha_boar_static)
print("Neighbour distances (m):", *neighbourhoods)

start = time.perf_counter()
print('Loading pig herd info')
# load pig herds info
units = pd.read_csv('herds.csv', sep=',', header=0).drop(columns=['biosecurity','is_commercial','is_outdoor','multisite','production','size'])
nb_pigherds = units.shape[0]

print('Loading wildboar info')
# load wildboars info
units = pd.concat([units, pd.read_csv('boars.csv', sep=',', header=0).drop(columns=['Zone'])],
                  ignore_index = True)
units = units.astype('int32')
nb_wildboars = units.shape[0] - nb_pigherds

print('Filtering points')
# prepare the list of points
points = units[['X', 'Y']].values

Xmin, Xmax = units['X'].min(), units['X'].max()
Ymin, Ymax = units['Y'].min(), units['Y'].max()

W = int(np.ceil((Xmax - Xmin) / C))
H = int(np.ceil((Ymax - Ymin) / C))

units['row'] = (np.floor((units['Y'] - Ymin) / C)).astype(int)
units['col'] = (np.floor((units['X'] - Xmin) / C)).astype(int)
units['tile'] = units['col'] + W * units['row']

print('Loading target tiles')
COL = TARGET_TILE % W
ROW = TARGET_TILE // W

ADJACENT_TILES = [(COL + i) + (ROW + j) * W
                  for j in [-1, 0, 1]
                  for i in [-1, 0, 1]
                  if (i != 0 or j != 0)]

tile_idx,  = (units['tile'] == TARGET_TILE).to_numpy().nonzero()
ph_idx = tile_idx[tile_idx < nb_pigherds]
wb_idx = tile_idx[tile_idx >= nb_pigherds]
nb_ph_tile, = ph_idx.shape

tile = points[tile_idx, :]
tile_ph = points[ph_idx, :]
tile_wb = points[wb_idx, :]

adjacent_idx = np.concatenate([tile_idx] +\
                              [(units['tile'] == other).to_numpy().nonzero()[0]
                               for other in ADJACENT_TILES])
nph_idx = adjacent_idx[adjacent_idx < nb_pigherds]
nwb_idx = adjacent_idx[adjacent_idx >= nb_pigherds]
nb_ph_adj, = nph_idx.shape

neighbour_tiles = points[adjacent_idx, :]
n_tiles_ph = points[nph_idx, :]
n_tiles_wb = points[nwb_idx, :]

### WARNING: slow operation !!! (about 3.10^6 coordinates)
# coords = tuple(zip(*it.product(tile_idx, adjacent_idx)))
irow, icol = np.meshgrid(tile_idx, adjacent_idx)
irow, icol = irow.T, icol.T

irowph, icolph = np.meshgrid(ph_idx, nph_idx)
irowph, icolph = irowph.T, icolph.T

irowwb, icolwb = np.meshgrid(wb_idx-nb_pigherds, nwb_idx-nb_pigherds)
irowwb, icolwb = irowwb.T, icolwb.T

nrowph, ncolph = np.meshgrid(ph_idx, nph_idx)
nrowph, ncolph = nrowph.T, ncolph.T

print('Computing kernel other_pathways (PH/PH)')
# Kernel for herd-herd indirect transmission
d_tile = distance_matrix(tile_ph, n_tiles_ph)
kernel = np.exp(- (d_tile / alpha_other_pathways)**2)
kernel[kernel < epsilon] = 0
np.fill_diagonal(kernel, 0)
kernel_other_whole = sp.lil_matrix((nb_pigherds,nb_pigherds))
kernel_other_whole[irowph, icolph] = kernel

print('Computing kernel boar-to-boar (WB/WB)')
# Kernel for direct transmission between I and S alive boars
d_tile = distance_matrix(tile_wb, n_tiles_wb)
kernel = np.exp(- (d_tile / alpha_boar_boar)**2)
kernel[kernel < epsilon] = 0
np.fill_diagonal(kernel, 0)
kernel_boars_whole = sp.lil_matrix((nb_wildboars,nb_wildboars))
kernel_boars_whole[irowwb, icolwb] = kernel

print('Computing kernel boar-static(PH+WB/PH+WB)')
# Kernel boar-to-static epid unit (I/S wild boar <--> boar carcass or pig herd)
d_tile = distance_matrix(tile, neighbour_tiles)
kernel = np.exp(- (d_tile / alpha_boar_static)**2)
kernel[kernel < epsilon] = 0
np.fill_diagonal(kernel, 0)
kernel_static_whole = sp.lil_matrix((nb_pigherds+nb_wildboars,nb_pigherds+nb_wildboars))
kernel_static_whole[irow, icol] = kernel


print('Computing neighbours')
# Distance matrix between pig herds on tile and pig herds in neighbourhood
d_tile = distance_matrix(tile_ph, n_tiles_ph)
neighbours = np.zeros(d_tile.shape, dtype='int32')
for dist in sorted(neighbourhoods, reverse=True):
    neighbours[d_tile <= dist] = dist
np.fill_diagonal(neighbours, 0)
neighbours_whole = sp.lil_matrix((nb_pigherds,nb_pigherds), dtype='int32')
neighbours_whole[nrowph, ncolph] = neighbours


print('Saving')
sp.save_npz('tiles_{}/kernel_boars_{}_{}'.format(C, alpha_boar_boar, TARGET_TILE), kernel_boars_whole.tocsr())
sp.save_npz('tiles_{}/kernel_static_{}_{}'.format(C, alpha_boar_static, TARGET_TILE), kernel_static_whole.tocsr())
sp.save_npz('tiles_{}/kernel_other_{}_{}'.format(C, alpha_other_pathways, TARGET_TILE), kernel_other_whole.tocsr())

sp.save_npz('tiles_{}/neighbours_{}'.format(C, TARGET_TILE), neighbours_whole.tocsr())

stop = time.perf_counter()
print('Computation finished in {:.2f} s'.format(stop-start))