In [None]:
import os
import pandas as pd
import geopandas as gpd

FOLDER_RAW = os.getenv('DIR_DATA_RAW')
DATA_CRIME = 'On_Street_Crime_In_Camden.csv'
DATA_POP = 'Population_20Projections_20_latest_20GLA_20set_.xlsx'
DATA_MAP = 'geo_export_aba7f1fe-addd-4eff-a2f6-fa63b09e4d6f.shp'
DATA_LIGHT = 'geo_export_e87e8b43-cf73-48e4-ad5c-692f56b45394.shp'

SAMPLE_SIZE = 5
SEED = 42

In [None]:
df_crime = pd.read_csv(filepath_or_buffer=FOLDER_RAW + "/" + DATA_CRIME,
                       parse_dates=["Outcome Date", "Epoch"])
df_pop = pd.read_excel(io=FOLDER_RAW + "/" + DATA_POP,
                       sheet_name="WardP Summry",
                       header=None,
                       names=['Ward Code',
                              'Ward Name',
                              '2015',
                              '2016',
                              '2017',
                              '2018',
                              '2019',
                              '2020'],
                       usecols="B,D,K:P",
                       skiprows=5,
                       nrows=18)
shp_ward = gpd.read_file(filename=FOLDER_RAW + "/camden_ward_boundary/" + DATA_MAP)
shp_light = gpd.read_file(filename=FOLDER_RAW + "/camden_street_lighting/" + DATA_LIGHT)

# Viewing the data
Below we display how the data looks in its raw form.

## On Street Crime in Camden
Key columns to take further are:
- Outcome Date
- Category
- Ward Name

For further development, would like to also include:
- Street Name
- Outcome Category
- Longitude
- Latitude

In [None]:
df_crime.sample(n=SAMPLE_SIZE, random_state=SEED)

## Camden Population Projections
Key columns to take further are:
- Ward Name
- 2015, ..., 2020

In [None]:
df_pop.sample(n=SAMPLE_SIZE, random_state=SEED)

## Camden Ward Boundary
Key columns to take further are:
- name
- geometry

In [None]:
shp_ward.sample(n=SAMPLE_SIZE, random_state=SEED)

## Camden Street Lighting
Key columns to take further are:
- lamp_type
- ward_name
- wattage

> Note: This dataset is updated daily so the date we have for street lighting is the date the author collected the data.

For further development, would like to also include:
- street_nam
- longitude
- latitude
- geometry

In [None]:
shp_light.sample(n=SAMPLE_SIZE, random_state=SEED)