import geopandas as gpd
import pandas as pd
from shapely.geometry import Point, Polygon, LineString, GeometryCollection, box
from shapely.ops import split
import math
import numpy as np
from scipy.optimize import linear_sum_assignment
import seaborn as sns
from matplotlib import pyplot as plt
from shapely.affinity import scale
from tqdm.auto import tqdm
pd.set_option('display.max_columns', None)
imd = gpd.read_file("input/IMD2018.zip")
df = imd.cx[1747618.6612:1768514.9720,5909569.6924:5922316.0159]
df = df[df.dhb2015_na == "Auckland"].copy()
df.set_index("DZ2018", inplace=True)
df.sample(1)
Census_Pop | Count_MB18 | dhb2015_co | dhb2015_na | ged2020num | ged2020nam | ta2020code | ta2020name | regc2020co | regc2020na | Rank_IMD18 | Dec_IMD18 | Rank_Emplo | Decile_Emp | Rank_Incom | Decile_Inc | Rank_Crime | Decile_Cri | Rank_Housi | Decile_Hou | Rank_Healt | Decile_Hea | Rank_Educa | Decile_Edu | Rank_Acces | Decile_Acc | RnkIMDNoEm | DecIMDNoEm | RnkIMDNoIn | DecIMDNoIn | RnkIMDNoCr | DecIMDNoCr | RnkIMDNoHo | DecIMDNoHo | RnkIMDNoHe | DecIMDNoHe | RnkIMDNoEd | DecIMDNoEd | RnkIMDNoAc | DecIMDNoAc | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
DZ2018 | |||||||||||||||||||||||||||||||||||||||||
7600547 | 1056 | 7 | 3 | Auckland | 25 | Mt Roskill | 76 | Auckland | 2 | Auckland Region | 2953 | 5 | 2988.5 | 5 | 3524 | 6 | 1286 | 3 | 5124 | 9 | 2107 | 4 | 1744 | 3 | 2171 | 4 | 2945 | 5 | 2640 | 5 | 3058 | 5 | 2510 | 5 | 3193 | 6 | 3158 | 6 | 3018 | 5 | POLYGON ((1753087.998 5913488.341, 1753074.246... |
df.plot("Rank_IMD18", legend=True, figsize=(10,10), edgecolor="black")
<AxesSubplot: >
# Pick the most central polygon
df_centroid = box(*df.total_bounds).centroid
start_index = df.distance(df_centroid).idxmin()
start = df.geometry[start_index]
start
df.centroid.distance(df.boundary).describe()
count 581.000000 mean 148.937563 std 69.089545 min 10.112110 25% 110.898948 50% 146.221736 75% 180.194046 max 679.568032 dtype: float64
radius = df.centroid.distance(df.boundary).mean()
radius
148.93756349772616
A hexagon is made of 6 equilateral triangles, with internal angles of 120 degrees. The internal angles of the equilateral triangles are all 60 degrees.
def create_hexagon(center, radius, rotate=False):
vertices = []
for i in range(6):
angle = math.radians(60 * i)
if rotate:
angle += math.radians(30)
x = center.x + radius * math.cos(angle)
y = center.y + radius * math.sin(angle)
vertices.append((x, y))
hexagon = Polygon(vertices)
return hexagon
start_hexagon = create_hexagon(start.centroid, radius)
assert start_hexagon.is_valid
start_hexagon
create_hexagon(start.centroid, radius, True)
Some options here for splitting up a hexagon to represent variables within. Here's the composite equilateral triangles:
def hexagon_to_triangles(hexagon):
center = hexagon.centroid
vertices = list(hexagon.exterior.coords)
triangles = []
for i in range(6):
next_i = (i + 1) % 6 # Wrap around to form a closed shape
triangle = Polygon([center, vertices[i], vertices[next_i]])
triangles.append(triangle)
return GeometryCollection(triangles)
triangles = hexagon_to_triangles(start_hexagon)
assert triangles.is_valid
triangles
Evenly sliced along the vertical axis (y)
def slice_hexagon(hexagon, slices = 7):
if hexagon is None:
return None
minx, miny, maxx, maxy = hexagon.bounds
poly = hexagon
segments = []
slice_height = (maxy-miny) / slices
for i in range(1, slices + 1):
y = miny + slice_height * i
parts = list(split(poly, LineString([(minx, y), (maxx, y)])).geoms)
parts.sort(key=lambda p: p.bounds[1], reverse=True)
#print(i, len(parts), [p.bounds[1] for p in parts], [p.area for p in parts])
if len(parts) == 2:
segments.append(parts[1])
poly = parts[0]
if i == slices:
segments.append(parts[0])
return GeometryCollection(segments)
slice_hexagon(start_hexagon)
Sub hexagons, with a nice star in the middle
def sub_hexagons_star(hexagon):
center = hexagon.centroid
hexagons = [create_hexagon(center, radius * .33)]
for i in range(6):
angle = math.radians(60 * i + 0)
x = center.x + radius * .66 * math.cos(angle)
y = center.y + radius * .66 * math.sin(angle)
hexagons.append(create_hexagon(Point([x, y]), radius * .33))
return GeometryCollection(hexagons)
GeometryCollection(list(sub_hexagons_star(start_hexagon).geoms) + [start_hexagon])
Better internal tesselation with rotated internal hexagons
def sub_hexagons(hexagon):
if hexagon is None:
return None
center = hexagon.centroid
radius = start_hexagon.length / 6
hexagons = [create_hexagon(center, radius * .33, True)]
for i in range(6):
angle = math.radians(60 * i + 0)
x = center.x + radius * .6 * math.cos(angle)
y = center.y + radius * .6 * math.sin(angle)
hexagons.append(create_hexagon(Point([x, y]), radius * .33, True))
return GeometryCollection(hexagons)
sub = sub_hexagons(start_hexagon)
GeometryCollection(list(sub.geoms) + [start_hexagon])
neighbours = df.geometry[df.touches(start)]
ax = neighbours.boundary.plot()
gpd.GeoSeries(start_hexagon).plot(ax=ax, edgecolor="red", facecolor="none")
<AxesSubplot: >
An equilateral triangle, split into two, makes two equal area triangles. The length of the line splitting the triangle, * 2, is the distance to the center of the neighbouring tesselated hexagon
triangle = triangles.geoms[0]
v1, v2, v3 = set(triangle.exterior.coords)
midpoint = ((v1[0] + v2[0]) / 2, (v1[1] + v2[1]) / 2)
# Create a line from the midpoint to the opposite vertex
split_line = LineString([midpoint, v3])
print(split_line.length)
split_triangles = split(triangle, scale(split_line, yfact=1.001))
split_triangles
128.98371356700807
radius, split_line.length * 2
(148.93756349772616, 257.96742713401613)
Create the neighbouring 6 tesselating hexagons
from shapely.geometry import Point
hex_neighbours = []
for i in range(6):
angle = math.radians(60 * i + 30)
x = start.centroid.x + split_line.length * 2 * math.cos(angle)
y = start.centroid.y + split_line.length * 2 * math.sin(angle)
hex_neighbours.append(create_hexagon(Point([x, y]), radius))
hex_neighbours = gpd.GeoSeries(hex_neighbours, crs=df.crs)
ax = hex_neighbours.plot(edgecolor="blue", facecolor="none", figsize=(10,10))
for i in range(6):
p = hex_neighbours.centroid.iloc[i]
ax.annotate(text=str(i), xy=[p.x, p.y + radius / 8], ha='center', color="blue")
neighbours.boundary.plot(ax=ax, color="green", alpha=.5)
for i in neighbours.index:
ax.annotate(text=str(i), xy=[neighbours.centroid[i].x, neighbours.centroid[i].y], ha='center', color="green")
gpd.GeoSeries(start_hexagon).plot(ax=ax, edgecolor="red", facecolor="none")
<AxesSubplot: >
# Build a cost distance matrix
distance_matrix = neighbours.centroid.apply(lambda n: n.distance(hex_neighbours.centroid))
print(distance_matrix)
ax = sns.heatmap(distance_matrix.round(), annot=True, fmt=".0f")
0 1 2 3 4 \ DZ2018 7601013 1037.278352 913.320417 658.989063 525.519917 719.696727 7601018 866.270091 674.339088 424.246421 448.249385 704.711413 7601022 894.379258 638.797863 515.889404 718.381927 952.850534 7601040 1083.400267 1109.612078 935.835365 680.333649 636.687560 7601046 722.536931 501.793548 595.691331 853.324329 999.212321 7601048 1148.808284 1318.769759 1279.153864 1055.425191 833.384142 7601086 396.314379 445.256259 703.120971 865.630991 841.503989 7601122 875.427342 1095.127849 1320.731186 1362.506789 1193.102165 7601143 1236.926378 1494.826013 1643.323584 1569.076814 1325.701362 5 DZ2018 7601013 958.045394 7601018 878.274647 7601022 1024.619428 7601040 872.251215 7601046 946.241970 7601048 892.999551 7601086 641.874599 7601122 937.265659 7601143 1136.431480
row_ind, col_ind = linear_sum_assignment(distance_matrix)
print(row_ind, col_ind, distance_matrix.index[row_ind], distance_matrix.values[row_ind, col_ind].sum())
mask = np.full_like(distance_matrix, np.nan)
mask[row_ind, col_ind] = distance_matrix.values[row_ind, col_ind]
sns.heatmap(mask.round(), annot=True, fmt=".0f")
[0 1 3 4 5 6] [3 2 4 1 5 0] Index([7601013, 7601018, 7601040, 7601046, 7601048, 7601086], dtype='int64', name='DZ2018') 3377.5613763247925
<AxesSubplot: >
# Create a new hex_geo column to store best matching hexagons for each DZ
df["hex_geo"] = gpd.GeoSeries()
df.loc[start_index, "hex_geo"] = start_hexagon
pbar = tqdm(total=sum(df.hex_geo.isna()))
queue = [start_index]
hexagons_to_use = []
def assign_neighbours(index):
# Given an index with a hexagon, this function assigns tesselating hexagons to it's neighbours
global hexagons_to_use
geometry = df.geometry[index]
geometry_hex = df.hex_geo[index]
if not geometry_hex:
return
neighbours = df.geometry[df.hex_geo.isna() & df.touches(geometry)]
if len(neighbours) == 0:
# No neighbours
#print("No neighbours")
return
hex_neighbours = []
for i in range(6):
angle = math.radians(60 * i + 30)
x = geometry_hex.centroid.x + split_line.length * 2 * math.cos(angle)
y = geometry_hex.centroid.y + split_line.length * 2 * math.sin(angle)
hex = create_hexagon(Point([x, y]), radius)
# Filter out already used hexagons
if df.hex_geo.centroid.distance(hex.centroid).min() > 1:
hex_neighbours.append(hex)
for hex in hexagons_to_use:
if (df.hex_geo.centroid.distance(hex.centroid).min() > 1) and (gpd.GeoSeries(hex_neighbours, crs=df.crs).distance(hex).min() > 1):
hex_neighbours.append(hex)
if len(hex_neighbours) == 0:
# No available hexagons
#print(f"No available hexagons for {index}")
return
hex_neighbours = gpd.GeoSeries(hex_neighbours, crs=df.crs)
# Build a cost distance matrix
distance_matrix = neighbours.centroid.apply(lambda n: n.distance(hex_neighbours.centroid))
if len(hexagons_to_use):
# Discount for using leftovers
for hex in hexagons_to_use:
#continue
minidx = hex_neighbours.centroid.distance(hex).idxmin()
dist = hex_neighbours.centroid[minidx].distance(hex)
if dist < 1:
distance_matrix[minidx] *= .98
hexagons_to_use = []
row_ind, col_ind = linear_sum_assignment(distance_matrix)
if len(col_ind) < len(hex_neighbours):
# Store unused hexagons, such that they might be used in the next call
hexagons_to_use = hex_neighbours[~hex_neighbours.index.isin(col_ind)].tolist()
# Assign the optimal hexagons to each neighbour
df.loc[distance_matrix.index[row_ind], "hex_geo"] = hex_neighbours[col_ind].values
pbar.update(len(col_ind))
queue.extend(distance_matrix.index[row_ind])
while queue:
# FIFO
assign_neighbours(queue.pop(0))
pbar.close()
print(f"Assigned hexagons for {sum(~df.hex_geo.isna())} out of {len(df)} DZs")
gpd.GeoSeries(df.hex_geo, crs=df.crs).explore()
0%| | 0/580 [00:00<?, ?it/s]
Assigned hexagons for 581 out of 581 DZs
rank_cols = ['Rank_Emplo', 'Rank_Incom', 'Rank_Crime', 'Rank_Housi', 'Rank_Healt', 'Rank_Educa', 'Rank_Acces']
df["sliced_hexagons"] = df.hex_geo.apply(slice_hexagon)
df["sub_hexagons"] = df.hex_geo.apply(sub_hexagons)
df["sub_hexagons_star"] = df.hex_geo.apply(sub_hexagons_star)
fig, ax = plt.subplots(figsize=(10,10))
for i, rank_col in enumerate(rank_cols):
df.set_geometry(df.sliced_hexagons.apply(lambda g: g.geoms[i] if g else None)).plot(rank_col, ax=ax, cmap="BuPu", edgecolor="black")
fig, ax = plt.subplots(figsize=(10,10))
df.set_geometry(df.hex_geo).plot("Rank_IMD18", ax=ax, cmap="BuPu", edgecolor="black", linewidth=.5)
for i, rank_col in enumerate(rank_cols):
df.set_geometry(df.sub_hexagons.apply(lambda g: g.geoms[i] if g else None)).plot(rank_col, ax=ax, cmap="BuPu", edgecolor="black", linewidth=.5)
fig, ax = plt.subplots(figsize=(10,10))
df.set_geometry(df.hex_geo).plot("Rank_IMD18", ax=ax, cmap="BuPu", edgecolor="black", linewidth=.5)
for i, rank_col in enumerate(rank_cols):
df.set_geometry(df.sub_hexagons_star.apply(lambda g: g.geoms[i] if g else None)).plot(rank_col, ax=ax, cmap="BuPu", edgecolor="black", linewidth=.5)
df.set_geometry(df.hex_geo).explore()
# Folium map
m = df.set_geometry(df.hex_geo).explore("Rank_IMD18", cmap="BuPu")
for i, rank_col in enumerate(rank_cols):
df.set_geometry(df.sub_hexagons.apply(lambda g: g.geoms[i] if g else None)).explore(rank_col, m=m, cmap="BuPu", legend=False)
m.save("hexagons_map.html")
m