Files
SIMoN/graphs/build.py
Michael T. Kelbaugh 566dd85a0c black . -l 79
Former-commit-id: 8f0b33b3653c1866411d30cdaab7b41d8cae2a16
2020-04-14 10:25:15 -04:00

408 lines
13 KiB
Python

# Copyright 2020 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Distributed under the terms of the MIT License.
from datetime import datetime
start = datetime.now()
print(start)
from geopandas import read_file, sjoin
import networkx as nx
from networkx.readwrite import json_graph
import geojson
import itertools
from collections import defaultdict
import uuid
import os
import json
with open(os.path.join(os.path.dirname(__file__), "config.json")) as f:
config = json.load(f)
# where to save the graphs
save_dir = os.path.join(os.path.dirname(__file__), "out")
# where the shapefiles are stored
shapefile_dir = os.path.join(os.path.dirname(__file__), "shapefiles")
# the coordinate reference system the shapefiles are defined on
projection = config["projection"]
# scale the areas calculated for shapefile geometries from square kilometers to square meters
scale_factor = config["scale_factor"]
# the minimum area of a meet node (a node formed by the interestion of two disparate geograophic granularities), in square kilometers
minimum_intersection_area = config["minimum_intersection_area"]
# Define the abstract graph with a list of tuples with the form (source, destination), where source is a higher lower resolution granularity that encompasses destination, a higher resolution granularity.
abstract_edges = config["abstract_edges"]
# Save the instance graph with its geometries included. This will create a very large graph.
save_shapes = config["save_shapes"]
# open the shapefiles for each granularity
states = read_file(f"{shapefile_dir}/state.shp").to_crs(epsg=projection)
counties = read_file(f"{shapefile_dir}/county.shp").to_crs(epsg=projection)
nercs = read_file(f"{shapefile_dir}/nerc.shp").to_crs(epsg=projection)
huc8s = read_file(f"{shapefile_dir}/huc8.shp").to_crs(epsg=projection)
latlons = read_file(f"{shapefile_dir}/latlon.shp").to_crs(epsg=projection)
# nerc regions
nercs["country"] = nercs.apply(lambda x: 1, axis=1)
nercs["area"] = nercs.apply(lambda row: row["geometry"].area, axis=1)
# nerc shapefile has the smallest scope, so it is used to define the scope of the USA
country = nercs.dissolve(by="country", aggfunc={"area": "sum"})
country["NAME"] = "usa48"
# counties
counties["county_polygons"] = counties.geometry.copy()
counties.geometry = counties.geometry.representative_point()
counties = sjoin(counties, states, how="inner", op="within")
counties.geometry = counties["county_polygons"]
counties.drop("index_right", axis=1, inplace=True)
counties.rename(columns={"ID_left": "ID"}, inplace=True)
counties.rename(columns={"NAME_left": "NAME"}, inplace=True)
counties.rename(columns={"ID_right": "parent_ID"}, inplace=True)
# latitude-longitude grid squares
latlons["parent_ID"] = "usa48"
huc8s["parent_ID"] = "usa48"
nercs["parent_ID"] = "usa48"
states["parent_ID"] = "usa48"
# each shapefile should have 4 attributes: ID, NAME, parent_ID, geometry
shapefiles = {}
shapefiles["state"] = states
shapefiles["county"] = counties
shapefiles["nerc"] = nercs
shapefiles["huc8"] = huc8s
shapefiles["latlon"] = latlons
for granularity, shapefile in shapefiles.items():
for column in ["ID", "NAME", "geometry", "parent_ID"]:
if column not in shapefile.columns:
print(
f"WARNING: required column {column} not in shapefile {granularity}"
)
# display the graph
def draw_graph(graph, display=False):
if display:
nx.draw(graph, labels={node: node for node in graph.nodes()})
print("{} nodes:".format(len(graph.nodes())))
print("{} edges:".format(len(graph.edges())))
# construct a graph from a list of edges
def build_graph(original_edges, is_abstract=False):
graph = nx.DiGraph()
graph.is_abstract = is_abstract
for edge in original_edges:
if is_abstract:
graph.add_edge(*edge)
else:
graph.add_edge(*edge)
return graph
# returns the name of a wedge node, based on its parents
def meet(a, b):
sort = sorted((a, b))
return "{}^{}".format(sort[0], sort[1])
# add wedge nodes to an abstact graph
def add_abstract_wedges(abstract_graph, original_nodes):
for combo in list(itertools.combinations(original_nodes, 2)):
if not (
nx.has_path(abstract_graph, combo[0], combo[1])
or nx.has_path(abstract_graph, combo[1], combo[0])
):
new_node = meet(combo[0], combo[1])
abstract_graph.add_edge(combo[0], new_node)
abstract_graph.add_edge(combo[1], new_node)
# add wedge nodes to an instance graph
def add_instance_wedges(graph, combos, instance_graph_types):
for combo in combos:
check_intersection = graph.nodes[combo[0]]["shape"].intersects(
graph.nodes[combo[1]]["shape"]
) and not graph.nodes[combo[0]]["shape"].touches(
graph.nodes[combo[1]]["shape"]
)
if not check_intersection:
continue
try:
shape = graph.nodes[combo[1]]["shape"].intersection(
graph.nodes[combo[0]]["shape"]
)
area = shape.area / scale_factor
new_node = meet(combo[0], combo[1])
if area >= minimum_intersection_area:
graph.add_edge(combo[0], new_node)
graph.add_edge(combo[1], new_node)
instance_graph_types[
meet(
graph.nodes[combo[0]]["type"],
graph.nodes[combo[1]]["type"],
)
].append(new_node)
graph.nodes[new_node]["shape"] = shape
graph.nodes[new_node]["area"] = area
graph.nodes[new_node]["type"] = meet(
graph.nodes[combo[0]]["type"],
graph.nodes[combo[1]]["type"],
)
else:
pass
# print(f"{new_node} is too small to be added. area = {area}")
except Exception as e:
print(
"ERROR: could not calculate intersection of {} with {}: {}".format(
combo[0], combo[1], e
)
)
if not graph.nodes[combo[0]]["shape"].is_valid:
print(
f"WARNING: {combo[0]} has invalid geometry, area = {graph.nodes[combo[0]]['shape'].area/scale_factor}"
)
# graph.remove_node(combo[0])
# print(f"removed {combo[0]} from graph")
if not graph.nodes[combo[1]]["shape"].is_valid:
print(
f"WARNING: {combo[1]} has invalid geometry, area = {graph.nodes[combo[1]]['shape'].area/scale_factor}"
)
# graph.remove_node(combo[1])
# print(f"removed {combo[1]} from graph")
return instance_graph_types
# construct an instance graph
def build_instance_graph(root):
instance_graph_types = defaultdict(list)
instance_graph = build_graph([])
instance_graph.add_node(root, name=root, type=root, shape=None, area=None)
abstract_nodes_bfs = [root] + [
v for u, v in nx.bfs_edges(abstract_graph, root)
]
for node in abstract_nodes_bfs:
for child in abstract_graph.successors(node):
shapefile = shapefiles[child]
for index, row in shapefile.iterrows():
ID = str(row["ID"])
shape = row["geometry"]
area = row["geometry"].area / scale_factor
name = row["NAME"]
if not shape.is_valid:
print(
f"WARNING: instance node {name} has invalid geometry. area = {area}"
)
if area < minimum_intersection_area:
print(
f"WARNING: instance node {name} is smaller than minimum intersection area. area = {area}"
)
instance_graph.add_node(
ID, name=name, type=child, shape=shape, area=area
)
instance_graph_types[child].append(ID)
instance_graph.add_edge(str(row["parent_ID"]), ID)
instance_graph.add_node(
root,
name=root,
type=root,
shape=None,
area=float(country.area) / scale_factor,
)
return instance_graph, instance_graph_types
# build the abstract graph
abstract_graph = build_graph(abstract_edges, is_abstract=True)
root = list(nx.topological_sort(abstract_graph))[0]
abstract_nodes = [root] + [v for u, v in nx.bfs_edges(abstract_graph, root)]
# build the instance graph
instance_graph, instance_graph_types = build_instance_graph(root)
# add wedges to the abstract graph
add_abstract_wedges(abstract_graph, abstract_nodes)
# iterate through the abstract graph wedges, in BFS order
abstract_graph_wedges = [
v for u, v in nx.bfs_edges(abstract_graph, root) if v not in abstract_nodes
]
for wedge in abstract_graph_wedges:
l, r = wedge.split("^")
parents = [
parent
for parent in abstract_graph.predecessors(wedge)
if parent in abstract_graph_wedges
]
if parents:
parent = sorted(
parents, key=lambda node: len(instance_graph_types[node])
)[0]
ll, rr = parent.split("^")
combos = []
for instance in instance_graph_types[parent]:
instance_l, instance_r = instance.split("^")
if l == instance_graph.nodes[instance_l].get("type"):
for element in instance_graph.successors(instance_r):
if instance_graph.nodes[element].get("type") == r:
combos.append((instance_l, element))
elif r == instance_graph.nodes[instance_r].get("type"):
for element in instance_graph.successors(instance_l):
if instance_graph.nodes[element].get("type") == l:
combos.append((element, instance_r))
elif l == instance_graph.nodes[instance_r].get("type"):
for element in instance_graph.successors(instance_l):
if instance_graph.nodes[element].get("type") == r:
combos.append((element, instance_r))
elif r == instance_graph.nodes[instance_l].get("type"):
for element in instance_graph.successors(instance_r):
if instance_graph.nodes[element].get("type") == l:
combos.append((instance_l, element))
else:
print(f"ERROR: no match for instance {instance}")
else:
combos = list(
itertools.product(
*[instance_graph_types[l], instance_graph_types[r]]
)
)
instance_graph_types = add_instance_wedges(
instance_graph, combos, instance_graph_types
)
# remove nodes without neighbors
no_neighbors = set(
[
node[0]
for node in instance_graph.nodes(data=True)
if node[1]["type"] in abstract_nodes
and not list(instance_graph.neighbors(node[0]))
]
)
if no_neighbors:
print(f"removing non-wedge nodes without children: {no_neighbors}")
for node in no_neighbors:
print(f"removing {node}")
instance_graph.remove_node(node)
# add metadata to the graphs
meets = [
"^".join(sorted(combo))
for combo in list(itertools.combinations(abstract_nodes, 2))
]
granularities = abstract_nodes + meets
counts = {
granularity: len(
[
node[1]["area"]
for node in instance_graph.nodes(data=True)
if node[1]["type"] == granularity
]
)
for granularity in granularities
}
areas = {
granularity: sum(
[
node[1]["area"]
for node in instance_graph.nodes(data=True)
if node[1]["type"] == granularity
]
)
for granularity in granularities
}
metadata = {
"id": str(uuid.uuid4()),
"projection": projection,
"granularities": abstract_nodes,
"minimum_intersect_area": minimum_intersection_area,
"nodes": len(instance_graph.nodes()),
"links": len(instance_graph.edges()),
"counts": counts,
"areas": areas,
}
abstract_graph.graph = metadata
instance_graph.graph = metadata
print(metadata)
# save the instance graph with its geometries (very large)
if save_shapes:
with open(
"{}/instance-graph_{}_{}_{}_{}_shapes.geojson".format(
save_dir,
"-".join(abstract_nodes),
projection,
minimum_intersection_area,
config["tag"],
),
mode="w",
) as outfile:
geojson.dump(json_graph.node_link_data(instance_graph), outfile)
# remove geometries from the instance graph (much smaller)
instance_graph_noshapes = json_graph.node_link_data(instance_graph)
for node in instance_graph_noshapes["nodes"]:
if "shape" in node:
del node["shape"]
# save graphs to JSON files
with open(
"{}/abstract-graph_{}_{}_{}_{}.geojson".format(
save_dir,
"-".join(abstract_nodes),
projection,
minimum_intersection_area,
config["tag"],
),
mode="w",
) as outfile:
geojson.dump(json_graph.node_link_data(abstract_graph), outfile)
with open(
"{}/instance-graph_{}_{}_{}_{}.geojson".format(
save_dir,
"-".join(abstract_nodes),
projection,
minimum_intersection_area,
config["tag"],
),
mode="w",
) as outfile:
geojson.dump(instance_graph_noshapes, outfile)
print("done building graphs")
end = datetime.now()
print(end)
print(end - start)