mirror of
https://github.com/JHUAPL/SIMoN.git
synced 2026-01-08 22:37:56 -05:00
408 lines
13 KiB
Python
408 lines
13 KiB
Python
# Copyright 2020 The Johns Hopkins University Applied Physics Laboratory LLC
|
|
# All rights reserved.
|
|
# Distributed under the terms of the MIT License.
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
start = datetime.now()
|
|
print(start)
|
|
|
|
from geopandas import read_file, sjoin
|
|
import networkx as nx
|
|
from networkx.readwrite import json_graph
|
|
import geojson
|
|
|
|
import itertools
|
|
from collections import defaultdict
|
|
import uuid
|
|
import os
|
|
import json
|
|
|
|
with open(os.path.join(os.path.dirname(__file__), "config.json")) as f:
|
|
config = json.load(f)
|
|
|
|
# where to save the graphs
|
|
save_dir = os.path.join(os.path.dirname(__file__), "out")
|
|
|
|
# where the shapefiles are stored
|
|
shapefile_dir = os.path.join(os.path.dirname(__file__), "shapefiles")
|
|
|
|
# the coordinate reference system the shapefiles are defined on
|
|
projection = config["projection"]
|
|
|
|
# scale the areas calculated for shapefile geometries from square kilometers to square meters
|
|
scale_factor = config["scale_factor"]
|
|
|
|
# the minimum area of a meet node (a node formed by the interestion of two disparate geograophic granularities), in square kilometers
|
|
minimum_intersection_area = config["minimum_intersection_area"]
|
|
|
|
# Define the abstract graph with a list of tuples with the form (source, destination), where source is a higher lower resolution granularity that encompasses destination, a higher resolution granularity.
|
|
abstract_edges = config["abstract_edges"]
|
|
|
|
# Save the instance graph with its geometries included. This will create a very large graph.
|
|
save_shapes = config["save_shapes"]
|
|
|
|
# open the shapefiles for each granularity
|
|
states = read_file(f"{shapefile_dir}/state.shp").to_crs(epsg=projection)
|
|
counties = read_file(f"{shapefile_dir}/county.shp").to_crs(epsg=projection)
|
|
nercs = read_file(f"{shapefile_dir}/nerc.shp").to_crs(epsg=projection)
|
|
huc8s = read_file(f"{shapefile_dir}/huc8.shp").to_crs(epsg=projection)
|
|
latlons = read_file(f"{shapefile_dir}/latlon.shp").to_crs(epsg=projection)
|
|
|
|
# nerc regions
|
|
nercs["country"] = nercs.apply(lambda x: 1, axis=1)
|
|
nercs["area"] = nercs.apply(lambda row: row["geometry"].area, axis=1)
|
|
|
|
# nerc shapefile has the smallest scope, so it is used to define the scope of the USA
|
|
country = nercs.dissolve(by="country", aggfunc={"area": "sum"})
|
|
country["NAME"] = "usa48"
|
|
|
|
# counties
|
|
counties["county_polygons"] = counties.geometry.copy()
|
|
counties.geometry = counties.geometry.representative_point()
|
|
counties = sjoin(counties, states, how="inner", op="within")
|
|
counties.geometry = counties["county_polygons"]
|
|
counties.drop("index_right", axis=1, inplace=True)
|
|
counties.rename(columns={"ID_left": "ID"}, inplace=True)
|
|
counties.rename(columns={"NAME_left": "NAME"}, inplace=True)
|
|
counties.rename(columns={"ID_right": "parent_ID"}, inplace=True)
|
|
|
|
# latitude-longitude grid squares
|
|
latlons["parent_ID"] = "usa48"
|
|
huc8s["parent_ID"] = "usa48"
|
|
nercs["parent_ID"] = "usa48"
|
|
states["parent_ID"] = "usa48"
|
|
|
|
# each shapefile should have 4 attributes: ID, NAME, parent_ID, geometry
|
|
shapefiles = {}
|
|
shapefiles["state"] = states
|
|
shapefiles["county"] = counties
|
|
shapefiles["nerc"] = nercs
|
|
shapefiles["huc8"] = huc8s
|
|
shapefiles["latlon"] = latlons
|
|
|
|
for granularity, shapefile in shapefiles.items():
|
|
for column in ["ID", "NAME", "geometry", "parent_ID"]:
|
|
if column not in shapefile.columns:
|
|
print(
|
|
f"WARNING: required column {column} not in shapefile {granularity}"
|
|
)
|
|
|
|
# display the graph
|
|
def draw_graph(graph, display=False):
|
|
|
|
if display:
|
|
nx.draw(graph, labels={node: node for node in graph.nodes()})
|
|
|
|
print("{} nodes:".format(len(graph.nodes())))
|
|
print("{} edges:".format(len(graph.edges())))
|
|
|
|
|
|
# construct a graph from a list of edges
|
|
def build_graph(original_edges, is_abstract=False):
|
|
graph = nx.DiGraph()
|
|
graph.is_abstract = is_abstract
|
|
for edge in original_edges:
|
|
if is_abstract:
|
|
graph.add_edge(*edge)
|
|
else:
|
|
graph.add_edge(*edge)
|
|
return graph
|
|
|
|
|
|
# returns the name of a wedge node, based on its parents
|
|
def meet(a, b):
|
|
sort = sorted((a, b))
|
|
return "{}^{}".format(sort[0], sort[1])
|
|
|
|
|
|
# add wedge nodes to an abstact graph
|
|
def add_abstract_wedges(abstract_graph, original_nodes):
|
|
for combo in list(itertools.combinations(original_nodes, 2)):
|
|
if not (
|
|
nx.has_path(abstract_graph, combo[0], combo[1])
|
|
or nx.has_path(abstract_graph, combo[1], combo[0])
|
|
):
|
|
new_node = meet(combo[0], combo[1])
|
|
abstract_graph.add_edge(combo[0], new_node)
|
|
abstract_graph.add_edge(combo[1], new_node)
|
|
|
|
|
|
# add wedge nodes to an instance graph
|
|
def add_instance_wedges(graph, combos, instance_graph_types):
|
|
|
|
for combo in combos:
|
|
|
|
check_intersection = graph.nodes[combo[0]]["shape"].intersects(
|
|
graph.nodes[combo[1]]["shape"]
|
|
) and not graph.nodes[combo[0]]["shape"].touches(
|
|
graph.nodes[combo[1]]["shape"]
|
|
)
|
|
if not check_intersection:
|
|
continue
|
|
|
|
try:
|
|
|
|
shape = graph.nodes[combo[1]]["shape"].intersection(
|
|
graph.nodes[combo[0]]["shape"]
|
|
)
|
|
area = shape.area / scale_factor
|
|
|
|
new_node = meet(combo[0], combo[1])
|
|
if area >= minimum_intersection_area:
|
|
graph.add_edge(combo[0], new_node)
|
|
graph.add_edge(combo[1], new_node)
|
|
instance_graph_types[
|
|
meet(
|
|
graph.nodes[combo[0]]["type"],
|
|
graph.nodes[combo[1]]["type"],
|
|
)
|
|
].append(new_node)
|
|
graph.nodes[new_node]["shape"] = shape
|
|
graph.nodes[new_node]["area"] = area
|
|
graph.nodes[new_node]["type"] = meet(
|
|
graph.nodes[combo[0]]["type"],
|
|
graph.nodes[combo[1]]["type"],
|
|
)
|
|
|
|
else:
|
|
pass
|
|
# print(f"{new_node} is too small to be added. area = {area}")
|
|
|
|
except Exception as e:
|
|
print(
|
|
"ERROR: could not calculate intersection of {} with {}: {}".format(
|
|
combo[0], combo[1], e
|
|
)
|
|
)
|
|
if not graph.nodes[combo[0]]["shape"].is_valid:
|
|
print(
|
|
f"WARNING: {combo[0]} has invalid geometry, area = {graph.nodes[combo[0]]['shape'].area/scale_factor}"
|
|
)
|
|
# graph.remove_node(combo[0])
|
|
# print(f"removed {combo[0]} from graph")
|
|
if not graph.nodes[combo[1]]["shape"].is_valid:
|
|
print(
|
|
f"WARNING: {combo[1]} has invalid geometry, area = {graph.nodes[combo[1]]['shape'].area/scale_factor}"
|
|
)
|
|
# graph.remove_node(combo[1])
|
|
# print(f"removed {combo[1]} from graph")
|
|
|
|
return instance_graph_types
|
|
|
|
|
|
# construct an instance graph
|
|
def build_instance_graph(root):
|
|
instance_graph_types = defaultdict(list)
|
|
instance_graph = build_graph([])
|
|
instance_graph.add_node(root, name=root, type=root, shape=None, area=None)
|
|
|
|
abstract_nodes_bfs = [root] + [
|
|
v for u, v in nx.bfs_edges(abstract_graph, root)
|
|
]
|
|
for node in abstract_nodes_bfs:
|
|
for child in abstract_graph.successors(node):
|
|
shapefile = shapefiles[child]
|
|
for index, row in shapefile.iterrows():
|
|
ID = str(row["ID"])
|
|
shape = row["geometry"]
|
|
area = row["geometry"].area / scale_factor
|
|
name = row["NAME"]
|
|
if not shape.is_valid:
|
|
print(
|
|
f"WARNING: instance node {name} has invalid geometry. area = {area}"
|
|
)
|
|
if area < minimum_intersection_area:
|
|
print(
|
|
f"WARNING: instance node {name} is smaller than minimum intersection area. area = {area}"
|
|
)
|
|
instance_graph.add_node(
|
|
ID, name=name, type=child, shape=shape, area=area
|
|
)
|
|
instance_graph_types[child].append(ID)
|
|
instance_graph.add_edge(str(row["parent_ID"]), ID)
|
|
|
|
instance_graph.add_node(
|
|
root,
|
|
name=root,
|
|
type=root,
|
|
shape=None,
|
|
area=float(country.area) / scale_factor,
|
|
)
|
|
return instance_graph, instance_graph_types
|
|
|
|
|
|
# build the abstract graph
|
|
abstract_graph = build_graph(abstract_edges, is_abstract=True)
|
|
root = list(nx.topological_sort(abstract_graph))[0]
|
|
abstract_nodes = [root] + [v for u, v in nx.bfs_edges(abstract_graph, root)]
|
|
|
|
# build the instance graph
|
|
instance_graph, instance_graph_types = build_instance_graph(root)
|
|
|
|
# add wedges to the abstract graph
|
|
add_abstract_wedges(abstract_graph, abstract_nodes)
|
|
|
|
# iterate through the abstract graph wedges, in BFS order
|
|
abstract_graph_wedges = [
|
|
v for u, v in nx.bfs_edges(abstract_graph, root) if v not in abstract_nodes
|
|
]
|
|
for wedge in abstract_graph_wedges:
|
|
l, r = wedge.split("^")
|
|
|
|
parents = [
|
|
parent
|
|
for parent in abstract_graph.predecessors(wedge)
|
|
if parent in abstract_graph_wedges
|
|
]
|
|
|
|
if parents:
|
|
parent = sorted(
|
|
parents, key=lambda node: len(instance_graph_types[node])
|
|
)[0]
|
|
|
|
ll, rr = parent.split("^")
|
|
combos = []
|
|
|
|
for instance in instance_graph_types[parent]:
|
|
instance_l, instance_r = instance.split("^")
|
|
|
|
if l == instance_graph.nodes[instance_l].get("type"):
|
|
for element in instance_graph.successors(instance_r):
|
|
if instance_graph.nodes[element].get("type") == r:
|
|
combos.append((instance_l, element))
|
|
|
|
elif r == instance_graph.nodes[instance_r].get("type"):
|
|
for element in instance_graph.successors(instance_l):
|
|
if instance_graph.nodes[element].get("type") == l:
|
|
combos.append((element, instance_r))
|
|
|
|
elif l == instance_graph.nodes[instance_r].get("type"):
|
|
for element in instance_graph.successors(instance_l):
|
|
if instance_graph.nodes[element].get("type") == r:
|
|
combos.append((element, instance_r))
|
|
|
|
elif r == instance_graph.nodes[instance_l].get("type"):
|
|
for element in instance_graph.successors(instance_r):
|
|
if instance_graph.nodes[element].get("type") == l:
|
|
combos.append((instance_l, element))
|
|
|
|
else:
|
|
print(f"ERROR: no match for instance {instance}")
|
|
|
|
else:
|
|
combos = list(
|
|
itertools.product(
|
|
*[instance_graph_types[l], instance_graph_types[r]]
|
|
)
|
|
)
|
|
|
|
instance_graph_types = add_instance_wedges(
|
|
instance_graph, combos, instance_graph_types
|
|
)
|
|
|
|
# remove nodes without neighbors
|
|
no_neighbors = set(
|
|
[
|
|
node[0]
|
|
for node in instance_graph.nodes(data=True)
|
|
if node[1]["type"] in abstract_nodes
|
|
and not list(instance_graph.neighbors(node[0]))
|
|
]
|
|
)
|
|
if no_neighbors:
|
|
print(f"removing non-wedge nodes without children: {no_neighbors}")
|
|
for node in no_neighbors:
|
|
print(f"removing {node}")
|
|
instance_graph.remove_node(node)
|
|
|
|
# add metadata to the graphs
|
|
meets = [
|
|
"^".join(sorted(combo))
|
|
for combo in list(itertools.combinations(abstract_nodes, 2))
|
|
]
|
|
granularities = abstract_nodes + meets
|
|
counts = {
|
|
granularity: len(
|
|
[
|
|
node[1]["area"]
|
|
for node in instance_graph.nodes(data=True)
|
|
if node[1]["type"] == granularity
|
|
]
|
|
)
|
|
for granularity in granularities
|
|
}
|
|
areas = {
|
|
granularity: sum(
|
|
[
|
|
node[1]["area"]
|
|
for node in instance_graph.nodes(data=True)
|
|
if node[1]["type"] == granularity
|
|
]
|
|
)
|
|
for granularity in granularities
|
|
}
|
|
metadata = {
|
|
"id": str(uuid.uuid4()),
|
|
"projection": projection,
|
|
"granularities": abstract_nodes,
|
|
"minimum_intersect_area": minimum_intersection_area,
|
|
"nodes": len(instance_graph.nodes()),
|
|
"links": len(instance_graph.edges()),
|
|
"counts": counts,
|
|
"areas": areas,
|
|
}
|
|
abstract_graph.graph = metadata
|
|
instance_graph.graph = metadata
|
|
print(metadata)
|
|
|
|
# save the instance graph with its geometries (very large)
|
|
if save_shapes:
|
|
with open(
|
|
"{}/instance-graph_{}_{}_{}_{}_shapes.geojson".format(
|
|
save_dir,
|
|
"-".join(abstract_nodes),
|
|
projection,
|
|
minimum_intersection_area,
|
|
config["tag"],
|
|
),
|
|
mode="w",
|
|
) as outfile:
|
|
geojson.dump(json_graph.node_link_data(instance_graph), outfile)
|
|
|
|
# remove geometries from the instance graph (much smaller)
|
|
instance_graph_noshapes = json_graph.node_link_data(instance_graph)
|
|
for node in instance_graph_noshapes["nodes"]:
|
|
if "shape" in node:
|
|
del node["shape"]
|
|
|
|
# save graphs to JSON files
|
|
with open(
|
|
"{}/abstract-graph_{}_{}_{}_{}.geojson".format(
|
|
save_dir,
|
|
"-".join(abstract_nodes),
|
|
projection,
|
|
minimum_intersection_area,
|
|
config["tag"],
|
|
),
|
|
mode="w",
|
|
) as outfile:
|
|
geojson.dump(json_graph.node_link_data(abstract_graph), outfile)
|
|
with open(
|
|
"{}/instance-graph_{}_{}_{}_{}.geojson".format(
|
|
save_dir,
|
|
"-".join(abstract_nodes),
|
|
projection,
|
|
minimum_intersection_area,
|
|
config["tag"],
|
|
),
|
|
mode="w",
|
|
) as outfile:
|
|
geojson.dump(instance_graph_noshapes, outfile)
|
|
|
|
print("done building graphs")
|
|
end = datetime.now()
|
|
print(end)
|
|
print(end - start)
|