Import all needed libraries

In [349]:
# only install the folium library the first time - then comment out
#!conda install -c conda-forge folium=0.5.0 --yes
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
!pip install geopy
import random # library for random number generation
import datetime
import numpy as np # library for vectorized computation
import pandas as pd # library to process data as dataframes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import matplotlib.pyplot as plt # plotting library
# backend for rendering plots within the browser
%matplotlib inline 
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.datasets.samples_generator import make_blobs
import folium
import folium.map
import json # library to handle JSON files
import requests # library to handle requests
import time
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe


print('Libraries imported.')
Requirement already satisfied: geopy in /opt/conda/envs/Python36/lib/python3.6/site-packages (1.18.1)
Requirement already satisfied: geographiclib<2,>=1.49 in /opt/conda/envs/Python36/lib/python3.6/site-packages (from geopy) (1.49)
Libraries imported.

The following cells reads in the required data

In [350]:
import types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
client_c7288fd559544ee4805edfc80a32e3c0 = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='eB-aiMeRHR9sUVErXYnFX5q1iaJ_Lz_7nu_6iYpLa5Lh',
    ibm_auth_endpoint="https://iam.ng.bluemix.net/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3-api.us-geo.objectstorage.service.networklayer.com')

body = client_c7288fd559544ee4805edfc80a32e3c0.get_object(Bucket='ibmdatascienceprofessionalcapston-donotdelete-pr-sipdcjsjlytlpp',Key='firearmdata.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )


firearms_df = pd.read_csv(body)
# drop all rows with na values
firearms_df = firearms_df.dropna()
#reset index
firearms_df.reset_index(drop=True)
firearms_df['RECOVERY_ZIPCODE'] = firearms_df['RECOVERY_ZIPCODE'].astype('int32')
print('firearms data loaded')
firearms_df.head()
firearms data loaded
Out[350]:
UCR_CATEGORY TYPE_OF_FIREARM RECOVERY_DATE RECOVERY_ZIPCODE PERSON_RECOVERED_FROM_RACE PERSON_RECOVERED_FROM_SEX PERSON_RECOVERED_FROM_AGE Address_Concat Longitude Latitude
0 Aggravated Assault Submachine Gun (machine Pistol) 2010-07-16T00:00:00Z 40204 White M 20.0 1000 Charles St Louisville KY 40204 -85.735605 38.229847
1 Wpns Law Violation Submachine Gun (machine Pistol) 2011-07-21T00:00:00Z 40202 White M 40.0 600 W Jefferson St Louisville KY 40202 -85.761323 38.253961
2 Aggravated Assault Submachine Gun (machine Pistol) 2016-12-09T00:00:00Z 40210 Black M 23.0 2200 Date St Louisville KY 40210 -85.789472 38.242925
3 Aggravated Assault Submachine Gun (machine Pistol) 2016-12-09T00:00:00Z 40210 Black M 23.0 2200 Date St Louisville KY 40210 -85.789472 38.242925
4 Aggravated Assault Submachine Gun (machine Pistol) 2016-12-09T00:00:00Z 40210 Black M 23.0 2200 Date St Louisville KY 40210 -85.789472 38.242925
In [351]:
body = client_c7288fd559544ee4805edfc80a32e3c0.get_object(Bucket='ibmdatascienceprofessionalcapston-donotdelete-pr-sipdcjsjlytlpp',Key='louisvillezips.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

louzipsdf = pd.read_csv(body)
louzipsdf['zip'] = louzipsdf['zip'].astype('int32')
print('louzips_df data loaded')
louzips_df data loaded
In [352]:
fmt = '%Y-%m-%dT%H:%M:%SZ'
w = firearms_df[['RECOVERY_DATE']].values.tolist()
dow = []
dt  = [] 
for i in range(len(w)):
    t    = w[i]
    dt.append(datetime.datetime.strptime(t[0],fmt).date())
    t = dt[i]
    dow.append(int(t.isoweekday())) # Sunday is day 0
    
crimedf = pd.DataFrame(data={'date':dt,'weekday':dow,'zip':firearms_df['RECOVERY_ZIPCODE']})
crimedf['weekday'] = crimedf['weekday'].astype('int32')
zipgrp = crimedf.groupby('zip')
zipcnt = zipgrp.count()
zipcnt.sort_values(by='zip',inplace=True)
zipcnt = zipcnt.reset_index()
zips   = zipcnt.join(louzipsdf,lsuffix='_l',rsuffix='_r')
#zips.drop(['date','zip_r'],axis=1,inplace=True)
zips = zips.dropna()
zips.reset_index(drop=True)
zips.rename(columns={'zip_l':'zip','weekday':'crime count','lat':'lat','lng':'lon'},inplace=True)
In [353]:
def getDay(n):
    d = 'X'
    if(n == 1):
        d = 'M:'
        
    if(n == 2):
        d = 'Tu:'
        
        
    if(n == 3):
        d = 'W:'
        
        
    if(n == 4):
        d = 'Th:'

        
    if(n == 5):
        d = 'F:'
        
        
    if(n == 6):
        d = 'Sa:'
        
        
    if(n == 7):
        d = 'Su:'
    
    return d
        
def getDowLabel(zc):
    lbl = ''
    g = zipgrp.get_group(zc)
    gcnt = g.groupby('weekday').count()
    gsum = float(gcnt['zip'].sum())
    ind  = gcnt.index
    j = 0 
    for y in ind:
        pcnt = format(round(100*(gcnt['zip'].iloc[j]/gsum)),'.0f')
        d = getDay(y)
        c = d+str(pcnt)+' '
        lbl = lbl+c
        j = j+1
    
    return lbl

def getVenueLabel(name,rating,cat):
    lbl = 'Venue:'+name+' <br> '+'Rating:'+str(rating)+ '<br> '+'Category:'+cat
    lbl = lbl+'<br> --------------------------- <br>'
    return lbl
In [354]:
address = 'Louisville, KY'
geolocator = Nominatim(user_agent="kentucky_explorer")
location = geolocator.geocode(address)
lati = location.latitude
longi = location.longitude
print('The geograpical coordinate of Louisville are {}, {}.'.format(longi, lati))
The geograpical coordinate of Louisville are -85.759407, 38.2542376.
In [355]:
CLIENT_ID = 'ZNMO4N1VVSBBVJDTQALK5TXZNMML2CYHLNMBUE0DKEM0HNAD' # your Foursquare ID
CLIENT_SECRET = 'VTRJTZOOYPBE0GXCTKRDDSNTQEG5YSFYZ0GBKSAJT4DAZMNB' # your Foursquare Secret
VERSION = '20180604'
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

def getLouisvilleVenues():
    radius = 13000 # 8 miles
    limit  = 50
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lati, 
            longi, 
            radius, 
            limit)
            
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']        
    # return only relevant information for each nearby venue
    return results 

def getVenue(v):
    venueid = v['venue']['id']
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
            venueid,
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION)
            
    # make the GET request
    resp = requests.get(url).json()['response']
    hasVenue = 'venue' in resp
    if hasVenue:
        result = resp['venue']
    else:
        result = None
    # return only relevant information for each nearby venue
    return result


def getVenuesDetail():
    venuesdf = pd.DataFrame(columns=['name','zipc','lat','lon','rating','color','cat','label'])
    res = getLouisvilleVenues()
    K = len(res)
    time.sleep(2.0)
    j = 1 
    for v in res:
        venue  = getVenue(v)
        time.sleep(1.0)
        if venue is None:
            print('--- empty venue ---')
            print('processed venue ... ',j,' ... out of ',K)
            j = j+1
            continue 
                
        time.sleep(0.5)
        hasName = 'name' in venue
        hasLoc  = 'location' in venue
        hasRate = 'rating' in venue
        hasColor = 'ratingColor' in venue
        hasCats  = 'categories' in venue
        if(hasName and hasLoc and hasRate and hasColor and hasCats):
            hasZip = 'postalCode' in venue['location']
            if hasZip:
                name   = venue['name']
                zipc   = venue['location']['postalCode']
                lat    = venue['location']['lat']
                lon    = venue['location']['lng']
                rating = venue['rating']
                color  = venue['ratingColor']
                cat   = venue['categories'][0]['shortName']
                venueLabel = getVenueLabel(name,rating,cat)
                venuesdf.loc[venuesdf.size] = [name,zipc,lat,lon,rating,color,cat,venueLabel]
                
        print('processed venue ... ',j,' ... out of ',K)
        j = j+1

    
            
    print('returning ',str(len(venuesdf.index)),' venues ...')
    return venuesdf
    
    
Your credentails:
CLIENT_ID: ZNMO4N1VVSBBVJDTQALK5TXZNMML2CYHLNMBUE0DKEM0HNAD
CLIENT_SECRET:VTRJTZOOYPBE0GXCTKRDDSNTQEG5YSFYZ0GBKSAJT4DAZMNB
In [356]:
#venuesDetail = getVenuesDetail()
# save for future use
#venuesDetail.to_csv('louisvillevenuedetail.csv',index=False)
venueDetail = pd.read_csv('louisvillevenuedetail.csv')
venueRandom = venueDetail.sample(10)
venueRandom
Out[356]:
name zipc lat lon rating color cat label
0 Jeff Ruby's Steakhouse 40202 38.256763 -85.756186 9.2 00B551 Steakhouse Venue:Jeff Ruby's Steakhouse<br>Rating:9.2<br>...
14 Aloft Louisville Downtown 40202 38.255766 -85.751760 8.9 73CF42 Hotel Venue:Aloft Louisville Downtown<br>Rating:8.9<...
4 Mussel & Burger Bar 40202 38.256702 -85.761528 8.9 73CF42 Burgers Venue:Mussel & Burger Bar<br>Rating:8.9<br>Cat...
32 Kentucky Science Center 40202 38.257753 -85.762545 8.3 73CF42 Science Museum Venue:Kentucky Science Center<br>Rating:8.3<br...
44 Rye 40206 38.252847 -85.735984 8.7 73CF42 Gastropub Venue:Rye<br>Rating:8.7<br>Category:Gastropub<...
41 Feast BBQ 40206 38.253070 -85.735440 8.8 73CF42 BBQ Venue:Feast BBQ<br>Rating:8.8<br>Category:BBQ<...
42 Sidebar at Whiskey Row 40202 38.256839 -85.753172 8.2 73CF42 Cocktail Venue:Sidebar at Whiskey Row<br>Rating:8.2<br>...
6 Proof on Main 40202 38.256981 -85.761560 8.8 73CF42 Bar Venue:Proof on Main<br>Rating:8.8<br>Category:...
48 Feeders Supply 40204 38.250779 -85.732174 8.8 73CF42 Pet Store Venue:Feeders Supply<br>Rating:8.8<br>Category...
46 Butchertown Grocery 40206 38.255908 -85.730765 8.9 73CF42 New American Venue:Butchertown Grocery<br>Rating:8.9<br>Cat...
In [362]:
# dictionary for zip code labels
zcLabels = {}
# determine scale factor for radius
N = float(len(crimedf['zip']))
sf = zips['crime count']/N
Mx = max(sf)
mx = min(sf)
R  = Mx-mx
slp =10.0/R
#setup the map
louisville = folium.Map(location=[lati, longi], zoom_start=11)
# set color scheme for the clusters
w = np.arange(len(zips['zip']))
ys = [i + w + (i*w)**2 for i in range(len(w))]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
K = len(rainbow)
i = 0 
# add markers to the map
for  zc,cnt,lat,lon in zip(zips['zip'], zips['crime count'], zips['lat'], zips['lon']):
    v        = cnt/N
    radval   = round(slp*(v-mx))
    pcrimes  = format(round(100*(cnt/N)), '.0f')
    dowlbl = getDowLabel(zc)
    ziplbl = ' Zip Code:'+str(zc)+'||% of crimes: '+str(pcrimes)+'||% by day: '+dowlbl
    zcLabels[zc] = ziplbl
    label = folium.Popup(ziplbl)
    folium.CircleMarker(
        [lat, lon],
        radius=10+radval,
        popup=label,
        color=rainbow[i % K],
        fill=True,
        fill_color=rainbow[i % K],
        fill_opacity=0.25).add_to(louisville)
    
    i = i+1 

for t in venueRandom.itertuples():
    name = t[1]
    zc   = int(t[2])
    la   = t[3]
    lg   = t[4]
    ra   = t[5]
    col  = t[6]
    cat  = t[7]
    lbl  = t[8]
    if zc in zcLabels:
        zlbl = zcLabels[zc]
        w = 'Venue:'+name+'||Rating:'+str(ra)+'||Category:'+cat+'||'+zlbl
        fLabel = folium.Popup(w,parse_html=True)
        mrker = folium.Marker(
                    location=[la,lg],
                    popup=fLabel)
        mrker.add_to(louisville)
    
# show the Louisville map
louisville
Out[362]:
In [ ]: