import pandas as pd
import numpy as np
incidents = pd.read_csv('NYPD_Shooting_Incident_Data__Year_To_Date__20250930.csv', index_col='INCIDENT_KEY', parse_dates=True)
ih = pd.read_csv('NYPD_Shooting_Incident_Data__Historic__20250930.csv', index_col='INCIDENT_KEY', parse_dates=True)

incidents['OCCUR_DATE'] = pd.to_datetime(incidents['OCCUR_DATE'], format='%m/%d/%Y')
incidents.sort_values(by='OCCUR_DATE')
incidents[0:15]

ih['OCCUR_DATE'] = pd.to_datetime(ih['OCCUR_DATE'], format='%m/%d/%Y')
ih.sort_values(by='OCCUR_DATE')

incidents.sort_values(by=['OCCUR_DATE'])

daily = pd.concat([ih, incidents]).groupby(['OCCUR_DATE']).size()
daily = pd.DataFrame(daily)

daily.plot()

<AxesSubplot: xlabel='OCCUR_DATE'>

inflation = pd.read_csv('inflation.csv', index_col='observation_date', parse_dates=True)

inflation = inflation[ inflation.index >= '2006-01-01']
inflation.plot()

<AxesSubplot: xlabel='observation_date'>

inflation['inflation'] = inflation['T10YIE']
inflation

daily = daily.join(inflation[['inflation']])

daily[[0, 'inflation']].plot(alpha=0.5)

<AxesSubplot: xlabel='OCCUR_DATE'>

#copied from bike tutorial
weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
for i in range(7):
    daily[weekdays[i]] = (daily.index.dayofweek==i).astype(float)
daily['weekdayno'] = daily.index.weekday.astype(int)

daily['dayofyear'] = (daily.index.dayofyear).astype(float)

#copied from bike tutorial
from pandas.tseries.holiday import USFederalHolidayCalendar
cal = USFederalHolidayCalendar()
holidays = cal.holidays('2006', '2025')
daily = daily.join(pd.Series(1, index=holidays, name='holiday'))
daily['holiday'].fillna(0, inplace=True)

daily[[0,'inflation', 'holiday']].plot(alpha=0.7)

<AxesSubplot: xlabel='OCCUR_DATE'>

from sklearn.linear_model import LinearRegression
dailyna = daily.fillna(method='ffill', inplace=False)
dailyna = daily.fillna(method='bfill', inplace=False)
cols = ['holiday', 'inflation', 'dayofyear'] + weekdays

X = dailyna[cols]
y = dailyna[0]
model = LinearRegression(fit_intercept=True)
model.fit(X,y)
dailyna['predicted'] = model.predict(X)
dailyna

dailyna[[0, 'predicted']].plot(alpha=0.5)

<AxesSubplot: xlabel='OCCUR_DATE'>

daily[0].plot()

<AxesSubplot: xlabel='OCCUR_DATE'>

params = pd.Series(model.coef_, index=X.columns)
params

holiday      1.965967
inflation    0.380162
dayofyear    0.003397
Mon         -0.149900
Tue         -0.699721
Wed         -0.867265
Thu         -0.823951
Fri         -0.328783
Sat          1.278995
Sun          1.590625
dtype: float64

dailyna

from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
poly_model = make_pipeline(PolynomialFeatures(7),
                           LinearRegression())

poly_model.fit(dailyna[['dayofyear']],y)

Pipeline(steps=[('polynomialfeatures', PolynomialFeatures(degree=7)),
                ('linearregression', LinearRegression())])

Pipeline(steps=[('polynomialfeatures', PolynomialFeatures(degree=7)),
                ('linearregression', LinearRegression())])

PolynomialFeatures(degree=7)

LinearRegression()

dailyna['danger_yq'] = poly_model.predict(dailyna[['dayofyear']])

dailyna[[0,'danger_yq']].plot(alpha=0.5)

<AxesSubplot: xlabel='OCCUR_DATE'>

poly_model.fit(dailyna[['weekdayno']],y)
dailyna['danger_wq'] = poly_model.predict(dailyna[['weekdayno']])
plotme = dailyna[dailyna.index > '2024-01-01']
plotme[[0,'danger_wq', 'danger_yq']].plot(alpha=0.5)

<AxesSubplot: xlabel='OCCUR_DATE'>

dailyna[[0,'danger_wq']].plot(alpha=0.5)

<AxesSubplot: xlabel='OCCUR_DATE'>

from sklearn.model_selection import train_test_split
cols = ['danger_wq', 'danger_yq']

X = dailyna[cols]
y = dailyna[0]
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.5)
tmodel = make_pipeline(PolynomialFeatures(7),
                           LinearRegression())
#tmodel = LinearRegression(fit_intercept=True)
tmodel.fit(Xtr,ytr)
plotme = pd.DataFrame()
plotme = Xte
plotme['predicted'] = tmodel.predict(Xte)
plotme['actual'] = yte
plotme = plotme[plotme.index > '2012-01-01']
plotme = plotme[plotme.index < '2020-06-01']
plotme[['actual', 'predicted']].plot(alpha=0.5)

<AxesSubplot: xlabel='OCCUR_DATE'>

plotme.plot(kind='scatter', x='actual', y='predicted', alpha=0.1)

<AxesSubplot: xlabel='actual', ylabel='predicted'>

import matplotlib.pyplot as plt

heatmap_data, xedges, yedges = np.histogram2d(plotme['predicted'], plotme['actual'], bins=20)
plt.figure(figsize=(8,12))
plt.imshow(heatmap_data.T,         # Transpose for correct orientation
           extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]],
           origin='lower',          # Setting origin to lower so y-axis is correct
           cmap='hot')              # Color map (you can change this)
          # aspect='auto')           # Improve the aspect ratio
plt.show()

_ = pd.concat([ih,incidents])
#_ = _.dropna(subset=['Longitude', 'Latitude'])
incident_locs = pd.DataFrame()
incident_locs['x'] = _['Longitude']
incident_locs['y'] = _['Latitude']
incident_locs['date'] = _['OCCUR_DATE']
incident_locs['dayofyear'] = incident_locs['date'].dt.dayofyear
incident_locs['weekday'] = incident_locs['date'].dt.weekday
incident_locs['time'] = pd.to_timedelta(_['OCCUR_TIME'])
incident_locs['seconds'] = incident_locs['time'].dt.total_seconds()
incident_locs = incident_locs[(incident_locs[['x','y']] != 0).all(axis=1)]

incident_locs.plot(kind='scatter',x='x', y='y', c='date', colorbar='True')#, x='Longitude', y="Latitude")

<AxesSubplot: xlabel='x', ylabel='y'>

incident_locs.index

Int64Index([231974218, 177934247, 255028563,  25384540,  72616285,  85875439,
             79780323,  85744504, 142324890, 152868707,
            ...
            299313254, 300295604, 306153155, 305163494, 305709030, 308455132,
            301256131, 300412113, 305034045, 306501157],
           dtype='int64', name='INCIDENT_KEY', length=30164)

_.index

Int64Index([231974218, 177934247, 255028563,  25384540,  72616285,  85875439,
             79780323,  85744504, 142324890, 152868707,
            ...
            299313254, 300295604, 306153155, 305163494, 305709030, 308455132,
            301256131, 300412113, 305034045, 306501157],
           dtype='int64', name='INCIDENT_KEY', length=30183)

poly_model.fit(dailyna[['dayofyear']],y)

Pipeline(steps=[('polynomialfeatures', PolynomialFeatures(degree=7)),
                ('linearregression', LinearRegression())])

Pipeline(steps=[('polynomialfeatures', PolynomialFeatures(degree=7)),
                ('linearregression', LinearRegression())])

PolynomialFeatures(degree=7)

LinearRegression()

incident_locs['dayofyear'] = incident_locs['date'].dt.dayofyear.astype(float)
incident_locs['week'] = incident_locs['date'].dt.isocalendar().week.astype(float)

incident_locs['yq'] = poly_model.predict(incident_locs[['dayofyear']])

incident_locs.plot(kind='scatter',x='x', y='y', c='y', colorbar='True')#, x='Longitude', y="Latitude")

<AxesSubplot: xlabel='x', ylabel='y'>

incident_locs.plot(kind='scatter',x='x', y='y', c='dayofyear', alpha=0.3, colorbar='True', figsize=(30,30))

<AxesSubplot: xlabel='x', ylabel='y'>

incident_locs.plot(kind='scatter',x='x', y='y', c='seconds', alpha=0.5, colorbar='True')

<AxesSubplot: xlabel='x', ylabel='y'>

incident_locs['hour'] = incident_locs['time'].dt.seconds // 3600
incident_locs['qhour'] = incident_locs['time'].dt.seconds // 900

incident_locs.plot(kind='scatter', x='dayofyear', y='seconds', alpha = 1)

<AxesSubplot: xlabel='dayofyear', ylabel='seconds'>

heatmap_data, xedges, yedges = np.histogram2d(incident_locs['dayofyear'], incident_locs['seconds'], bins=50)
plt.figure(figsize=(4,6))
plt.imshow(heatmap_data.T,         # Transpose for correct orientation
           extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]],
           origin='lower',          # Setting origin to lower so y-axis is correct
           cmap='hot',              # Color map (you can change this)
           aspect='auto')           # Improve the aspect ratio
plt.show()

from sklearn.mixture import GaussianMixture

# Prepare data for GMM
X = incident_locs[['dayofyear', 'seconds']]

# Fit GMM
gmm = GaussianMixture(n_components=4)  # Adjust n_components for granularity
gmm.fit(X.values)

from scipy.interpolate import griddata
from mpl_toolkits.mplot3d import Axes3D

t_min, t_max = X['seconds'].min(), X['seconds'].max()
d_min, d_max = X['dayofyear'].min(), X['dayofyear'].max()

y = np.linspace(t_min, t_max, 100)
x = np.linspace(d_min, d_max, 100)
X, Y = np.meshgrid(x, y)
XX = np.array([X.ravel(), Y.ravel()]).T
Z_log_prob = gmm.score_samples(XX)
Z = np.exp(Z_log_prob)
Z = Z.reshape(X.shape)

plt.figure(figsize=(8, 6))
# Use contourf for a filled heatmap
CF = plt.contourf(X, Y, Z, levels=50, cmap='viridis')
plt.colorbar(CF, label='Probability Density Function (PDF) value')
plt.scatter(incident_locs['dayofyear'], incident_locs['seconds'], c='red', s=10, alpha=0.1, label='Actual Crime Time and Day')
# Overlay the original burrow locations as a scatter plot
plt.title("Crime Density Map with Time and Day")
plt.xlabel("Day of Year")
plt.ylabel("Second of Day")
plt.legend()
plt.show()

incident_locs.plot(kind='scatter', x='weekday', y='seconds', c='dayofyear', alpha=0.01)

<AxesSubplot: xlabel='weekday', ylabel='seconds'>

incident_locs.hist(column='hour', bins=24)

array([[<AxesSubplot: title={'center': 'hour'}>]], dtype=object)

incident_locs.hist(column='weekday')

array([[<AxesSubplot: title={'center': 'weekday'}>]], dtype=object)

incident_locs['minute'] = incident_locs['time'].dt.seconds // 60

incident_locs.hist(column='minute', bins=100)

array([[<AxesSubplot: title={'center': 'minute'}>]], dtype=object)

oneday_model = make_pipeline(PolynomialFeatures(7),
                           LinearRegression())

oneday = pd.DataFrame(incident_locs.groupby(['qhour']).size())
oneday['qhour'] = oneday.index
y = oneday[0]

oneday[[0]].plot()

<AxesSubplot: xlabel='qhour'>

oneday_model.fit(oneday['qhour'].array.reshape(-1,1), y)

Pipeline(steps=[('polynomialfeatures', PolynomialFeatures(degree=7)),
                ('linearregression', LinearRegression())])

Pipeline(steps=[('polynomialfeatures', PolynomialFeatures(degree=7)),
                ('linearregression', LinearRegression())])

PolynomialFeatures(degree=7)

LinearRegression()

oneday['predicted'] = oneday_model.predict(oneday.index.array.reshape(-1,1))

oneday[[0,'predicted']].plot()

<AxesSubplot: xlabel='qhour'>

incident_locs.plot(kind='scatter', x='qhour', y='y', figsize=(15,15))

<AxesSubplot: xlabel='qhour', ylabel='y'>

incident_locs.plot(kind='scatter', x='x', y='qhour', figsize=(15,15))

<AxesSubplot: xlabel='x', ylabel='qhour'>

import matplotlib.pyplot as plt
fig = plt.figure(figsize=(15,15))
ax = fig.add_subplot(projection='3d')
ax.scatter(incident_locs['x'], incident_locs['y'], incident_locs['qhour'], c=incident_locs['qhour'])

<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7fba475d00d0>

ilc = incident_locs.dropna(subset=['x', 'y'])
heatmap_data, xedges, yedges = np.histogram2d(ilc['x'], ilc['y'], bins=123)
plt.figure(figsize=(12, 12))
plt.imshow(heatmap_data.T,         # Transpose for correct orientation
           extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]],
           origin='lower',          # Setting origin to lower so y-axis is correct
           cmap='hot')              # Color map (you can change this)
          # aspect='auto')           # Improve the aspect ratio
plt.show()

from sklearn.mixture import GaussianMixture

# Prepare data for GMM
X = ilc[['x', 'y']]

# Fit GMM
gmm = GaussianMixture(n_components=33)  # Adjust n_components for granularity
gmm.fit(X.values)

GaussianMixture(n_components=33)

GaussianMixture(n_components=33)

from scipy.interpolate import griddata
from mpl_toolkits.mplot3d import Axes3D

lat_min, lat_max = ilc['y'].min(), ilc['y'].max()
lon_min, lon_max = ilc['x'].min(), ilc['x'].max()

lat_range = np.linspace(lat_min, lat_max, 100)
lon_range = np.linspace(lon_min, lon_max, 100)
lon_grid, lat_grid = np.meshgrid(lon_range, lat_range)
X = lon_grid
Y = lat_grid
XX = np.array([lon_grid.ravel(), lat_grid.ravel()]).T
Z_log_prob = gmm.score_samples(XX)
Z = np.exp(Z_log_prob)
Z = Z.reshape(lon_grid.shape)

plt.figure(figsize=(10, 8))
# Use contourf for a filled heatmap
CF = plt.contourf(X, Y, Z, levels=50, cmap='viridis')
plt.colorbar(CF, label='Probability Density Function (PDF) value')
plt.scatter(ilc['x'], ilc['y'], c='red', s=10, alpha=0.01, label='Actual Crime Locations')

# Overlay the original burrow locations as a scatter plot
plt.title("Crime Density Map using GMM")
plt.xlabel("X Coordinate")
plt.ylabel("Y Coordinate")
plt.legend()
plt.show()

from sklearn.mixture import GaussianMixture

# Prepare data for GMM
X = ilc[['x', 'y', 'week', 'hour']]

# Fit GMM
gmm = GaussianMixture(n_components=43)  # Adjust n_components for granularity
gmm.fit(X.values)

/usr/lib/python3/dist-packages/sklearn/mixture/_base.py:274: ConvergenceWarning: Initialization 1 did not converge. Try different init parameters, or increase max_iter, tol or check for degenerate data.
  warnings.warn(

GaussianMixture(n_components=43)

GaussianMixture(n_components=43)

from scipy.interpolate import griddata
from mpl_toolkits.mplot3d import Axes3D

TARGET_HOUR = 21
TARGET_WEEK = 22

lat_min, lat_max = ilc['y'].min(), ilc['y'].max()
lon_min, lon_max = ilc['x'].min(), ilc['x'].max()

lat_range = np.linspace(lat_min, lat_max, 100)
lon_range = np.linspace(lon_min, lon_max, 100)
lon_grid, lat_grid = np.meshgrid(lon_range, lat_range)
X = lon_grid
Y = lat_grid
XX_st = np.array([
    X.ravel(),
    Y.ravel(),
    np.full(X.size, TARGET_HOUR),
    np.full(X.size, TARGET_WEEK)
]).T
Z_log_prob = gmm.score_samples(XX_st)
Z = np.exp(Z_log_prob)
Z = Z.reshape(lon_grid.shape)

plt.figure(figsize=(10, 8))
# Use contourf for a filled heatmap
CF = plt.contourf(X, Y, Z, levels=50, cmap='viridis')
plt.colorbar(CF, label='Probability Density Function (PDF) value')
ilc2 = ilc[ilc['week'] == TARGET_WEEK]
ilc2 = ilc2[ilc2['hour'] == TARGET_HOUR]
plt.scatter(ilc2['x'], ilc2['y'], c='red', s=10, alpha=1, label='Actual Crime Locations')

# Overlay the original crime locations as a scatter plot
plt.title("Crime Density Map using GMM IN 4 DIMENSIONS")
plt.xlabel("X Coordinate")
plt.ylabel("Y Coordinate")
plt.legend()
plt.show()

	OCCUR_DATE	OCCUR_TIME	BORO	LOC_OF_OCCUR_DESC	PRECINCT	JURISDICTION_CODE	LOC_CLASSFCTN_DESC	LOCATION_DESC	STATISTICAL_MURDER_FLAG	PERP_AGE_GROUP	PERP_SEX	PERP_RACE	VIC_AGE_GROUP	VIC_SEX	VIC_RACE	X_COORD_CD	Y_COORD_CD	Latitude	Longitude	New Georeferenced Column
INCIDENT_KEY
304617623	2025-04-11	21:55:00	BROOKLYN	OUTSIDE	60	0	STREET	(null)	Y	(null)	(null)	(null)	45-64	M	BLACK	996467	150881	40.580810	-73.956017	POINT (-73.956017 40.58081)
303861624	2025-03-29	13:21:00	BROOKLYN	OUTSIDE	81	2	HOUSING	MULTI DWELL - PUBLIC HOUS	N	<18	M	BLACK	18-24	M	BLACK	1000876	191778	40.693058	-73.940046	POINT (-73.940046 40.693058)
305163491	2025-04-22	20:42:00	BROOKLYN	OUTSIDE	94	2	HOUSING	MULTI DWELL - PUBLIC HOUS	N	(null)	(null)	(null)	18-24	M	BLACK HISPANIC	1001685	201032	40.718455	-73.937104	POINT (-73.937104 40.718455)
306837529	2025-05-21	17:50:00	BRONX	OUTSIDE	52	0	STREET	(null)	Y	<18	M	WHITE HISPANIC	18-24	M	WHITE HISPANIC	1018634	259808	40.879731	-73.875662	POINT (-73.875662 40.879731)
307581458	2025-06-04	22:40:00	MANHATTAN	OUTSIDE	14	0	STREET	(null)	Y	(null)	(null)	(null)	25-44	M	WHITE HISPANIC	986684	215375	40.757840	-73.991213	POINT (-73.991213 40.75784)
299648921	2025-01-17	09:54:00	BRONX	OUTSIDE	40	2	HOUSING	MULTI DWELL - PUBLIC HOUS	N	18-24	M	BLACK	18-24	M	BLACK	1004934	235728	40.813680	-73.925276	POINT (-73.925276 40.81368)
306062049	2025-05-08	00:25:00	BRONX	OUTSIDE	47	2	HOUSING	MULTI DWELL - PUBLIC HOUS	N	(null)	(null)	(null)	25-44	M	WHITE HISPANIC	1030360	261043	40.883065	-73.833249	POINT (-73.833249 40.883065)
307442124	2025-06-02	21:55:00	BROOKLYN	INSIDE	83	0	DWELLING	MULTI DWELL - APT BUILD	Y	(null)	(null)	(null)	25-44	M	BLACK	1007689	191193	40.691437	-73.915478	POINT (-73.915478 40.691437)
304263629	2025-04-05	17:45:00	BRONX	OUTSIDE	46	0	STREET	(null)	N	25-44	M	WHITE HISPANIC	18-24	M	BLACK HISPANIC	1009234	247586	40.846216	-73.909699	POINT (-73.909699 40.846216)
305637544	2025-04-30	19:45:00	BRONX	OUTSIDE	48	0	STREET	(null)	N	(null)	(null)	(null)	25-44	M	BLACK	1017326	245083	40.839319	-73.880464	POINT (-73.880464 40.839319)
300827432	2025-02-08	01:58:00	BRONX	OUTSIDE	49	0	DWELLING	MULTI DWELL - APT BUILD	Y	45-64	M	BLACK	45-64	M	BLACK	1023484	247656	40.846355	-73.858191	POINT (-73.858191 40.846355)
302317359	2025-03-09	14:00:00	QUEENS	OUTSIDE	110	0	STREET	(null)	N	18-24	M	WHITE HISPANIC	18-24	M	BLACK	1022047	207921	40.737301	-73.863612	POINT (-73.863612 40.737301)
303275049	2025-03-20	00:59:00	MANHATTAN	OUTSIDE	34	0	STREET	(null)	N	25-44	M	WHITE HISPANIC	25-44	M	BLACK HISPANIC	1006609	255632	40.868306	-73.919159	POINT (-73.919159 40.868306)
300364619	2025-01-31	22:47:00	BROOKLYN	OUTSIDE	62	0	STREET	(null)	N	25-44	M	WHITE HISPANIC	25-44	M	WHITE HISPANIC	988574	161577	40.610175	-73.984427	POINT (-73.984427 40.610175)
305034043	2025-04-20	01:57:00	BROOKLYN	OUTSIDE	81	2	HOUSING	MULTI DWELL - PUBLIC HOUS	N	(null)	(null)	(null)	25-44	F	BLACK	1003207	190085	40.688406	-73.931642	POINT (-73.931642 40.688406)

	OCCUR_DATE	OCCUR_TIME	BORO	LOC_OF_OCCUR_DESC	PRECINCT	JURISDICTION_CODE	LOC_CLASSFCTN_DESC	LOCATION_DESC	STATISTICAL_MURDER_FLAG	PERP_AGE_GROUP	PERP_SEX	PERP_RACE	VIC_AGE_GROUP	VIC_SEX	VIC_RACE	X_COORD_CD	Y_COORD_CD	Latitude	Longitude	Lon_Lat
INCIDENT_KEY
9953252	2006-01-01	02:22:00	MANHATTAN	NaN	28	0.0	NaN	NONE	True	25-44	M	BLACK	25-44	M	BLACK	998815.750000000000000	233545.437500000000000	40.807700	-73.947386	POINT (-73.94738575999997 40.80770036400003)
9953248	2006-01-01	19:00:00	QUEENS	NaN	106	0.0	NaN	NONE	False	18-24	M	BLACK	18-24	M	BLACK	1028604.625000000000000	187929.500000000000000	40.682397	-73.840081	POINT (-73.84008072399996 40.68239691900004)
139716503	2006-01-01	12:30:00	BROOKLYN	NaN	77	0.0	NaN	PVT HOUSE	True	NaN	NaN	NaN	25-44	M	BLACK	996441.562500000000000	184160.359375000000000	40.672154	-73.956052	POINT (-73.95605150499995 40.67215420900004)
9953246	2006-01-01	05:51:00	BRONX	NaN	44	0.0	NaN	NONE	False	25-44	M	WHITE HISPANIC	18-24	M	WHITE HISPANIC	1007418.000000000000000	243859.218750000000000	40.835990	-73.916276	POINT (-73.91627635899994 40.83599040100006)
9953245	2006-01-01	02:00:00	BRONX	NaN	48	0.0	NaN	NONE	False	18-24	M	BLACK	<18	M	BLACK	1013404.562500000000000	251800.750000000000000	40.857770	-73.894607	POINT (-73.89460745999997 40.85776982200008)
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
298672096	2024-12-30	16:45:00	BRONX	OUTSIDE	47	0.0	STREET	(null)	False	(null)	(null)	(null)	<18	F	WHITE HISPANIC	1,021,316	259,277	40.878261	-73.865964	POINT (-73.865964 40.878261)
298672097	2024-12-30	18:48:00	BROOKLYN	OUTSIDE	60	2.0	HOUSING	MULTI DWELL - PUBLIC HOUS	False	25-44	M	BLACK	45-64	M	BLACK	989,372	155,205	40.592685	-73.981557	POINT (-73.981557 40.592685)
298672095	2024-12-30	20:32:00	BRONX	INSIDE	41	0.0	DWELLING	MULTI DWELL - APT BUILD	True	18-24	M	BLACK	25-44	M	BLACK	1,012,201	240,878	40.827795	-73.899003	POINT (-73.899003 40.827795)
298699604	2024-12-31	19:16:00	BROOKLYN	OUTSIDE	69	0.0	STREET	(null)	False	25-44	M	BLACK	18-24	M	BLACK	1,015,120	173,870	40.643866	-73.888761	POINT (-73.888761 40.643866)
298699604	2024-12-31	19:16:00	BROOKLYN	OUTSIDE	69	0.0	STREET	(null)	False	25-44	M	BLACK	25-44	M	BLACK	1,015,120	173,870	40.643866	-73.888761	POINT (-73.888761 40.643866)

	OCCUR_DATE	OCCUR_TIME	BORO	LOC_OF_OCCUR_DESC	PRECINCT	JURISDICTION_CODE	LOC_CLASSFCTN_DESC	LOCATION_DESC	STATISTICAL_MURDER_FLAG	PERP_AGE_GROUP	PERP_SEX	PERP_RACE	VIC_AGE_GROUP	VIC_SEX	VIC_RACE	X_COORD_CD	Y_COORD_CD	Latitude	Longitude	New Georeferenced Column
INCIDENT_KEY
298756516	2025-01-01	21:22:00	BRONX	OUTSIDE	40	2	HOUSING	MULTI DWELL - PUBLIC HOUS	N	(null)	(null)	(null)	<18	M	WHITE HISPANIC	1005407	235915	40.814191	-73.923568	POINT (-73.923568 40.814191)
298756517	2025-01-01	05:15:00	BRONX	OUTSIDE	44	0	STREET	(null)	N	45-64	M	BLACK	18-24	M	BLACK	1009255	244278	40.837127	-73.909635	POINT (-73.90963478 40.83712653)
298756517	2025-01-01	05:15:00	BRONX	OUTSIDE	44	0	STREET	(null)	N	45-64	M	BLACK	45-64	M	BLACK	1009255	244278	40.837127	-73.909635	POINT (-73.90963478 40.83712653)
298756515	2025-01-01	13:00:00	BRONX	INSIDE	52	0	DWELLING	MULTI DWELL - APT BUILD	N	45-64	M	WHITE HISPANIC	18-24	M	WHITE HISPANIC	1013194	254357	40.864789	-73.895355	POINT (-73.895355 40.864789)
298756517	2025-01-01	05:15:00	BRONX	OUTSIDE	44	0	STREET	(null)	Y	45-64	M	BLACK	18-24	M	BLACK	1009255	244278	40.837127	-73.909635	POINT (-73.90963478 40.83712653)
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
308800539	2025-06-29	02:58:00	BROOKLYN	OUTSIDE	73	0	STREET	(null)	Y	(null)	(null)	(null)	25-44	M	BLACK	1010272	180242	40.661370	-73.906206	POINT (-73.906206 40.66137)
308833787	2025-06-29	17:45:00	BRONX	OUTSIDE	43	0	STREET	(null)	N	(null)	(null)	(null)	25-44	M	BLACK	1022594	243360	40.834568	-73.861434	POINT (-73.861434 40.834568)
308800538	2025-06-29	02:43:00	BRONX	OUTSIDE	43	0	STREET	(null)	N	(null)	(null)	(null)	18-24	M	WHITE HISPANIC	1019527	241348	40.829049	-73.872528	POINT (-73.87252839 40.82904948)
308868057	2025-06-30	10:00:00	BRONX	OUTSIDE	50	0	STREET	MULTI DWELL - APT BUILD	N	(null)	(null)	(null)	45-64	M	WHITE HISPANIC	1007640	259512	40.878952	-73.915416	POINT (-73.915416 40.878952)
308868058	2025-06-30	13:00:00	BROOKLYN	OUTSIDE	73	0	STREET	(null)	N	(null)	(null)	(null)	25-44	M	BLACK	1007919	179199	40.658505	-73.914692	POINT (-73.91469197 40.65850488)

	T10YIE	inflation
observation_date
2006-01-02	NaN	NaN
2006-01-03	2.34	2.34
2006-01-04	2.35	2.35
2006-01-05	2.32	2.32
2006-01-06	2.33	2.33
...	...	...
2025-09-30	2.36	2.36
2025-10-01	2.35	2.35
2025-10-02	2.34	2.34
2025-10-03	2.33	2.33
2025-10-06	2.34	2.34

NYPD Shooting Incident Notebook: Solving Crime with Space and Time¶

Preparing the datasets¶

Looking for patterns¶

Training a naive linear model¶

A more sensible approach.¶

Every Crime is Spatial¶

Different Times¶

Spacetime continuum¶

Heatmap¶

Gaussian Mixture¶

4 Dimensions¶