Prepare georeferenced data for artificial disease outbreak in Europe¶
In this notebook geospatial data from a shapefile is loaded, relevant attributes extracte and
%matplotlib inline
import geopandas as gp
import pandas as pd
The geospatial vector dataset shapefile was downloaded from open access NaturalEarthData, which provides free vector and raster map data.
shp = 'Shapefile/ne_110m_admin_0_countries.shp'
geo_df = gp.GeoDataFrame.from_file(shp)
geo_df.head(1)
featurecla | scalerank | LABELRANK | SOVEREIGNT | SOV_A3 | ADM0_DIF | LEVEL | TYPE | TLC | ADMIN | ... | FCLASS_TR | FCLASS_ID | FCLASS_PL | FCLASS_GR | FCLASS_IT | FCLASS_NL | FCLASS_SE | FCLASS_BD | FCLASS_UA | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Admin-0 country | 1 | 6 | Fiji | FJI | 0 | 2 | Sovereign country | 1 | Fiji | ... | None | None | None | None | None | None | None | None | None | MULTIPOLYGON (((180.00000 -16.06713, 180.00000... |
1 rows × 169 columns
Attributes in shapefile¶
geo_df.columns
Index(['featurecla', 'scalerank', 'LABELRANK', 'SOVEREIGNT', 'SOV_A3',
'ADM0_DIF', 'LEVEL', 'TYPE', 'TLC', 'ADMIN',
...
'FCLASS_TR', 'FCLASS_ID', 'FCLASS_PL', 'FCLASS_GR', 'FCLASS_IT',
'FCLASS_NL', 'FCLASS_SE', 'FCLASS_BD', 'FCLASS_UA', 'geometry'],
dtype='object', length=169)
Take only European countries and exclude Russia¶
geo_df_EU = geo_df[geo_df["REGION_UN"] == "Europe"]
geo_df_EU.set_index("ADMIN", inplace=True)
geo_df_EU.drop('Russia', axis=0, inplace=True)
geo_df_EU.reset_index(inplace=True)
/home/eneko/.miniconda3/envs/cook/lib/python3.7/site-packages/pandas/core/frame.py:4913: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
errors=errors,
Retrieve relevant data¶
geo_df_EU = geo_df_EU[['geometry', 'POP_EST', 'ADMIN']]
geo_df_EU.head()
geometry | POP_EST | ADMIN | |
---|---|---|---|
0 | MULTIPOLYGON (((15.14282 79.67431, 15.52255 80... | 5347896.0 | Norway |
1 | MULTIPOLYGON (((-51.65780 4.15623, -52.24934 3... | 67059887.0 | France |
2 | POLYGON ((11.02737 58.85615, 11.46827 59.43239... | 10285453.0 | Sweden |
3 | POLYGON ((28.17671 56.16913, 29.22951 55.91834... | 9466856.0 | Belarus |
4 | POLYGON ((31.78599 52.10168, 32.15944 52.06125... | 44385155.0 | Ukraine |
Compute population density based on Population density the number of people per unit of area,
geo_df_EU['POP_DENSITY'] = geo_df_EU['POP_EST']/(geo_df_EU['geometry'].area)
/home/eneko/.miniconda3/envs/cook/lib/python3.7/site-packages/ipykernel_launcher.py:1: UserWarning: Geometry is in a geographic CRS. Results from 'area' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
"""Entry point for launching an IPython kernel.
Normalization of pop density into a contact rate between 10 and 100
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
x_scaled = min_max_scaler.fit_transform(geo_df_EU['POP_DENSITY'].values.reshape(-1, 1))[:,0]
geo_df_EU['inf_rate'] = pd.Series(x_scaled)
geo_df_EU.head()
geometry | POP_EST | ADMIN | POP_DENSITY | inf_rate | |
---|---|---|---|---|---|
0 | MULTIPOLYGON (((15.14282 79.67431, 15.52255 80... | 5347896.0 | Norway | 59095.218795 | 0.012681 |
1 | MULTIPOLYGON (((-51.65780 4.15623, -52.24934 3... | 67059887.0 | France | 923490.631806 | 0.276615 |
2 | POLYGON ((11.02737 58.85615, 11.46827 59.43239... | 10285453.0 | Sweden | 129464.357267 | 0.034167 |
3 | POLYGON ((28.17671 56.16913, 29.22951 55.91834... | 9466856.0 | Belarus | 334504.027829 | 0.096774 |
4 | POLYGON ((31.78599 52.10168, 32.15944 52.06125... | 44385155.0 | Ukraine | 628756.876391 | 0.186621 |
geo_df_EU.plot('inf_rate');
Give columns easy understandable names and remove unncessary columns
geo_df_EU.rename(columns={'ADMIN': 'country', 'POP_EST': 'population'},inplace=True )
geo_df_EU.drop(['POP_DENSITY'], axis=1, inplace=True)
Save results into new shapefle
geo_df_EU.to_file('geo_df_EU.shp', driver="ESRI Shapefile")
geo_df_EU
geometry | population | country | inf_rate | |
---|---|---|---|---|
0 | MULTIPOLYGON (((15.14282 79.67431, 15.52255 80... | 5347896.0 | Norway | 0.012681 |
1 | MULTIPOLYGON (((-51.65780 4.15623, -52.24934 3... | 67059887.0 | France | 0.276615 |
2 | POLYGON ((11.02737 58.85615, 11.46827 59.43239... | 10285453.0 | Sweden | 0.034167 |
3 | POLYGON ((28.17671 56.16913, 29.22951 55.91834... | 9466856.0 | Belarus | 0.096774 |
4 | POLYGON ((31.78599 52.10168, 32.15944 52.06125... | 44385155.0 | Ukraine | 0.186621 |
5 | POLYGON ((23.48413 53.91250, 23.52754 53.47012... | 37970874.0 | Poland | 0.279088 |
6 | POLYGON ((16.97967 48.12350, 16.90375 47.71487... | 8877067.0 | Austria | 0.260907 |
7 | POLYGON ((22.08561 48.42226, 22.64082 48.15024... | 9769949.0 | Hungary | 0.266325 |
8 | POLYGON ((26.61934 48.22073, 26.85782 48.36821... | 2657637.0 | Moldova | 0.206089 |
9 | POLYGON ((28.23355 45.48828, 28.67978 45.30403... | 19356544.0 | Romania | 0.208615 |
10 | POLYGON ((26.49433 55.61511, 26.58828 55.16718... | 2786844.0 | Lithuania | 0.088953 |
11 | POLYGON ((27.28818 57.47453, 27.77002 57.24426... | 1912789.0 | Latvia | 0.056778 |
12 | POLYGON ((27.98113 59.47537, 27.98112 59.47537... | 1326590.0 | Estonia | 0.053291 |
13 | POLYGON ((14.11969 53.75703, 14.35332 53.24817... | 83132799.0 | Germany | 0.547376 |
14 | POLYGON ((22.65715 44.23492, 22.94483 43.82379... | 6975761.0 | Bulgaria | 0.170384 |
15 | MULTIPOLYGON (((26.29000 35.29999, 26.16500 35... | 10716322.0 | Greece | 0.232713 |
16 | POLYGON ((21.02004 40.84273, 20.99999 40.58000... | 2854191.0 | Albania | 0.268249 |
17 | POLYGON ((16.56481 46.50375, 16.88252 46.38063... | 4067500.0 | Croatia | 0.183671 |
18 | POLYGON ((9.59423 47.52506, 9.63293 47.34760, ... | 8574832.0 | Switzerland | 0.475913 |
19 | POLYGON ((6.04307 50.12805, 6.24275 49.90223, ... | 619896.0 | Luxembourg | 0.622395 |
20 | POLYGON ((6.15666 50.80372, 6.04307 50.12805, ... | 11484055.0 | Belgium | 0.910184 |
21 | POLYGON ((6.90514 53.48216, 7.09205 53.14404, ... | 17332850.0 | Netherlands | 1.000000 |
22 | POLYGON ((-9.03482 41.88057, -8.67195 42.13469... | 10269417.0 | Portugal | 0.314522 |
23 | POLYGON ((-7.45373 37.09779, -7.53711 37.42890... | 47076781.0 | Spain | 0.264485 |
24 | POLYGON ((-6.19788 53.86757, -6.03299 53.15316... | 4941444.0 | Ireland | 0.186591 |
25 | MULTIPOLYGON (((10.44270 46.89355, 11.04856 46... | 60297396.0 | Italy | 0.525439 |
26 | MULTIPOLYGON (((9.92191 54.98310, 9.28205 54.8... | 5818553.0 | Denmark | 0.282656 |
27 | MULTIPOLYGON (((-6.19788 53.86757, -6.95373 54... | 66834405.0 | United Kingdom | 0.591287 |
28 | POLYGON ((-14.50870 66.45589, -14.73964 65.808... | 361313.0 | Iceland | 0.000000 |
29 | POLYGON ((13.80648 46.50931, 14.63247 46.43182... | 2087946.0 | Slovenia | 0.281129 |
30 | POLYGON ((28.59193 69.06478, 28.44594 68.36461... | 5520314.0 | Finland | 0.021063 |
31 | POLYGON ((22.55814 49.08574, 22.28084 48.82539... | 5454073.0 | Slovakia | 0.284090 |
32 | POLYGON ((15.01700 51.10667, 15.49097 50.78473... | 10669709.0 | Czechia | 0.316024 |
33 | POLYGON ((18.56000 42.65000, 17.67492 43.02856... | 3301000.0 | Bosnia and Herzegovina | 0.171569 |
34 | POLYGON ((22.38053 42.32026, 22.88137 41.99930... | 2083459.0 | North Macedonia | 0.229644 |
35 | POLYGON ((18.82982 45.90887, 18.82984 45.90888... | 6944975.0 | Republic of Serbia | 0.241080 |
36 | POLYGON ((20.07070 42.58863, 19.80161 42.50009... | 622137.0 | Montenegro | 0.123049 |
37 | POLYGON ((20.59025 41.85541, 20.52295 42.21787... | 1794248.0 | Kosovo | 0.439454 |