from src.utils.helper_geo import get_lat_long_centre

import os
import pandas as pd
import geopandas as gpd
import branca.colormap as cm
import folium
from folium.plugins import TimeSliderChoropleth

FOLDER_PROCESSED = os.environ.get('DIR_DATA_PROCESSED')
DATA_CRIME_MAP = 'df_crime_map.pkl'
df_camden = pd.read_pickle(filepath_or_buffer=FOLDER_PROCESSED + "/" + DATA_CRIME_MAP)
# remove NaT from Outcome Date column
df_camden = df_camden.dropna(subset=['Outcome Sec'])

6.1. Explore Crime on a MapΒΆ

In this section, we further explore crime rates by considering how they compare across wards and across time.

The advantage of mapping this interactively is that we can intuitively and easily enable the ward, time and crime category dimensions to be on one visualisation. If we used other visualisation methods like barplots or distributions charts, then these plots will have too much information on and be more difficult to understand.

[Jumping Rivers - A. Curran, Mar 2020]

# define colour map w.r.t Crime Rate
max_colour = max(df_camden['Crime Rate'])
min_colour = min(df_camden['Crime Rate'])
cmap = cm.linear.YlOrRd_09.scale(min_colour, max_colour)
df_camden['Colour'] = df_camden['Crime Rate'].map(cmap)
# construct style dictionary for choropleth mapping
ward_list = df_camden['Ward Name'].unique().tolist()
ward_idx = range(len(ward_list))

style_dict = {}
for i in ward_idx:
    ward = ward_list[i]
    result = df_camden[df_camden['Ward Name'] == ward]
    inner_dict = {}
    for _, r in result.iterrows():
        inner_dict[r['Outcome Sec']] = {'color': r['Colour'], 'opacity': 0.7}
    style_dict[str(i)] = inner_dict
# make df with features of each ward
gdf_ward = gpd.GeoDataFrame(data=df_camden[['geometry']])
gdf_ward = gdf_ward.drop_duplicates().reset_index()

# set projection for accurate centroid mapping
centroids = gdf_ward.centroid
centroid = list(zip(list(centroids.y), list(centroids.x)))
centroid = get_lat_long_centre(geolocations=centroid)
<ipython-input-5-52d019f63e4e>:6: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.

  centroids = gdf_ward.centroid

Development Note

  • Would like to add crime rates in the pop-up but this is difficult because we need to store the user-input made via the TimeSliderChoropleth() and pass it as a filter to the dataset when we loop over list_iterate.

  • Would like to add Vega time-series plots for each ward if it does not make visualisation too busy. Will need to consider how we can split by time in style_dict. [Folium Quickstart] and [StackOverflow]

# make map and add colourbar
# - can then add some prediction on crime levels in future
# - add labels of wards
# - add popups of crime level numbers
slider_map = folium.Map(location=centroid,
                        zoom_start=12,
                        max_bounds=True,
                        tiles='cartodbpositron')

# include descriptive stats

# add popups
feature_group = folium.FeatureGroup("Locations")
list_iterate = zip(list(centroids.y), list(centroids.x), ward_list)
for lon, lat, name in list_iterate:
    # create html table of summary stats for all time
    stats_desc = df_camden[df_camden["Ward Name"]==name]["Crime Rate"] * 100
    stats_desc = stats_desc.describe()
    stats_desc = pd.DataFrame(data=stats_desc)
    stats_desc = stats_desc.round(decimals=2)
    stats_desc = '<strong>Summay stats for all time</strong>\n' + stats_desc.to_html()
    # add html header 
    feature_group.add_child(folium.Marker(location=[lon, lat], 
                                          tooltip=name, 
                                          popup=stats_desc))

_ = slider_map.add_child(feature_group)

# add time slider for choropleth
_ = TimeSliderChoropleth(data=gdf_ward.to_json(),
                         styledict=style_dict).add_to(slider_map)
_ = cmap.add_to(slider_map)

cmap.caption = "Leaflet Map: Crime Rate per Ward"

slider_map
Make this Notebook Trusted to load map: File -> Trust Notebook