Bokeh visualizations

Former-commit-id: 8955e62d6a66da2b2e39a58380be9c0959c08f60
This commit is contained in:
Michael T. Kelbaugh
2020-02-12 15:09:42 -05:00
parent d5655f753e
commit 4d50963c6f
27 changed files with 433 additions and 0 deletions

Binary file not shown.

View File

@@ -0,0 +1 @@
PROJCS["NAD83_HARN_Texas_Centric_Albers_Equal_Area",GEOGCS["GCS_NAD83(HARN)",DATUM["D_North_American_1983_HARN",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Albers"],PARAMETER["standard_parallel_1",27.5],PARAMETER["standard_parallel_2",35],PARAMETER["latitude_of_origin",18],PARAMETER["central_meridian",-100],PARAMETER["false_easting",1500000],PARAMETER["false_northing",6000000],UNIT["Meter",1]]

View File

@@ -0,0 +1 @@
PROJCS["NAD83(HARN) / Texas Centric Albers Equal Area",GEOGCS["NAD83(HARN)",DATUM["NAD83_High_Accuracy_Reference_Network",SPHEROID["GRS 1980",6378137,298.257222101,AUTHORITY["EPSG","7019"]],TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","6152"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4152"]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["standard_parallel_1",27.5],PARAMETER["standard_parallel_2",35],PARAMETER["latitude_of_center",18],PARAMETER["longitude_of_center",-100],PARAMETER["false_easting",1500000],PARAMETER["false_northing",6000000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["X",EAST],AXIS["Y",NORTH],AUTHORITY["EPSG","3085"]]

View File

@@ -0,0 +1 @@
2a8f074a67f33bda86d85297b7fea83090e64a37

Binary file not shown.

View File

@@ -0,0 +1 @@
66b583ac6076324bafaac49f29679adbfba9764d

View File

@@ -0,0 +1 @@
PROJCS["NAD83_HARN_Texas_Centric_Albers_Equal_Area",GEOGCS["GCS_NAD83(HARN)",DATUM["D_North_American_1983_HARN",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Albers"],PARAMETER["standard_parallel_1",27.5],PARAMETER["standard_parallel_2",35],PARAMETER["latitude_of_origin",18],PARAMETER["central_meridian",-100],PARAMETER["false_easting",1500000],PARAMETER["false_northing",6000000],UNIT["Meter",1]]

View File

@@ -0,0 +1 @@
PROJCS["NAD83(HARN) / Texas Centric Albers Equal Area",GEOGCS["NAD83(HARN)",DATUM["NAD83_High_Accuracy_Reference_Network",SPHEROID["GRS 1980",6378137,298.257222101,AUTHORITY["EPSG","7019"]],TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","6152"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4152"]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["standard_parallel_1",27.5],PARAMETER["standard_parallel_2",35],PARAMETER["latitude_of_center",18],PARAMETER["longitude_of_center",-100],PARAMETER["false_easting",1500000],PARAMETER["false_northing",6000000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["X",EAST],AXIS["Y",NORTH],AUTHORITY["EPSG","3085"]]

View File

@@ -0,0 +1 @@
9fbc0f21af568b1a94c561cd1d9a6e6ec6a41836

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1 @@
PROJCS["NAD83_HARN_Texas_Centric_Albers_Equal_Area",GEOGCS["GCS_NAD83(HARN)",DATUM["D_North_American_1983_HARN",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Albers"],PARAMETER["standard_parallel_1",27.5],PARAMETER["standard_parallel_2",35],PARAMETER["latitude_of_origin",18],PARAMETER["central_meridian",-100],PARAMETER["false_easting",1500000],PARAMETER["false_northing",6000000],UNIT["Meter",1]]

View File

@@ -0,0 +1 @@
PROJCS["NAD83(HARN) / Texas Centric Albers Equal Area",GEOGCS["NAD83(HARN)",DATUM["NAD83_High_Accuracy_Reference_Network",SPHEROID["GRS 1980",6378137,298.257222101,AUTHORITY["EPSG","7019"]],TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","6152"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4152"]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["standard_parallel_1",27.5],PARAMETER["standard_parallel_2",35],PARAMETER["latitude_of_center",18],PARAMETER["longitude_of_center",-100],PARAMETER["false_easting",1500000],PARAMETER["false_northing",6000000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["X",EAST],AXIS["Y",NORTH],AUTHORITY["EPSG","3085"]]

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1 @@
PROJCS["NAD83_HARN_Texas_Centric_Albers_Equal_Area",GEOGCS["GCS_NAD83(HARN)",DATUM["D_North_American_1983_HARN",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Albers"],PARAMETER["standard_parallel_1",27.5],PARAMETER["standard_parallel_2",35],PARAMETER["latitude_of_origin",18],PARAMETER["central_meridian",-100],PARAMETER["false_easting",1500000],PARAMETER["false_northing",6000000],UNIT["Meter",1]]

View File

@@ -0,0 +1 @@
PROJCS["NAD83(HARN) / Texas Centric Albers Equal Area",GEOGCS["NAD83(HARN)",DATUM["NAD83_High_Accuracy_Reference_Network",SPHEROID["GRS 1980",6378137,298.257222101,AUTHORITY["EPSG","7019"]],TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","6152"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4152"]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["standard_parallel_1",27.5],PARAMETER["standard_parallel_2",35],PARAMETER["latitude_of_center",18],PARAMETER["longitude_of_center",-100],PARAMETER["false_easting",1500000],PARAMETER["false_northing",6000000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["X",EAST],AXIS["Y",NORTH],AUTHORITY["EPSG","3085"]]

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1 @@
PROJCS["NAD83_HARN_Texas_Centric_Albers_Equal_Area",GEOGCS["GCS_NAD83(HARN)",DATUM["D_North_American_1983_HARN",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Albers"],PARAMETER["standard_parallel_1",27.5],PARAMETER["standard_parallel_2",35],PARAMETER["latitude_of_origin",18],PARAMETER["central_meridian",-100],PARAMETER["false_easting",1500000],PARAMETER["false_northing",6000000],UNIT["Meter",1]]

View File

@@ -0,0 +1 @@
PROJCS["NAD83(HARN) / Texas Centric Albers Equal Area",GEOGCS["NAD83(HARN)",DATUM["NAD83_High_Accuracy_Reference_Network",SPHEROID["GRS 1980",6378137,298.257222101,AUTHORITY["EPSG","7019"]],TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","6152"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4152"]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["standard_parallel_1",27.5],PARAMETER["standard_parallel_2",35],PARAMETER["latitude_of_center",18],PARAMETER["longitude_of_center",-100],PARAMETER["false_easting",1500000],PARAMETER["false_northing",6000000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["X",EAST],AXIS["Y",NORTH],AUTHORITY["EPSG","3085"]]

Binary file not shown.

Binary file not shown.

229
viz/Plot.ipynb Normal file
View File

@@ -0,0 +1,229 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Plot data on a choropleth map"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"json_file = \"path/to/mongo_doc.json\" # path to the Mongo document to plot\n",
"shapefile_dir = \"path/to/shapefiles/\" # path to the directory of shapefiles\n",
"plot_width = 1200 # pixel width of the plot\n",
"plot_height = 800 # pixel height of the plot\n",
"projection = 4326 # coordinate reference system to use for plotting"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## import packages"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from geopandas import read_file\n",
"import pandas as pd\n",
"import json\n",
"\n",
"from bokeh.io import show\n",
"from bokeh.models import LogColorMapper\n",
"from bokeh.palettes import Blues256 as palette\n",
"palette.reverse()\n",
"from bokeh.plotting import figure, output_file, save"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## geopandas functions for getting coordinates\n",
"### references:\n",
"\n",
"https://automating-gis-processes.github.io/2016/Lesson5-interactive-map-bokeh.html\n",
"\n",
"https://discourse.bokeh.org/t/mapping-europe-with-bokeh-using-geopandas-and-handling-multipolygons/2571"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def getXYCoords(geometry, coord_type):\n",
" # Returns either x or y coordinates from geometry coordinate sequence. Used with LineString and Polygon geometries.\"\"\"\n",
" if coord_type == 'x':\n",
" return list(geometry.coords.xy[0])\n",
" elif coord_type == 'y':\n",
" return list(geometry.coords.xy[1])\n",
"\n",
"def getPolyCoords(geometry, coord_type):\n",
" # Returns Coordinates of Polygon using the Exterior of the Polygon.\"\"\"\n",
" ext = geometry.exterior\n",
" return getXYCoords(ext, coord_type)\n",
"\n",
"def multiGeomHandler(multi_geometry, coord_type, geom_type):\n",
" \"\"\"\n",
" Function for handling multi-geometries. Can be MultiPoint, MultiLineString or MultiPolygon.\n",
" Returns a list of coordinates where all parts of Multi-geometries are merged into a single list.\n",
" Individual geometries are separated with np.nan which is how Bokeh wants them.\n",
" # Bokeh documentation regarding the Multi-geometry issues can be found here (it is an open issue)\n",
" # https://github.com/bokeh/bokeh/issues/2321\n",
" \"\"\"\n",
"\n",
" for i, part in enumerate(multi_geometry):\n",
" # On the first part of the Multi-geometry initialize the coord_array (np.array)\n",
" if i == 0:\n",
" if geom_type == \"MultiPoint\":\n",
" coord_arrays = np.append(getPointCoords(part, coord_type), np.nan)\n",
" elif geom_type == \"MultiLineString\":\n",
" coord_arrays = np.append(getLineCoords(part, coord_type), np.nan)\n",
" elif geom_type == \"MultiPolygon\":\n",
" coord_arrays = np.append(getPolyCoords(part, coord_type), np.nan)\n",
" else:\n",
" if geom_type == \"MultiPoint\":\n",
" coord_arrays = np.concatenate([coord_arrays, np.append(getPointCoords(part, coord_type), np.nan)])\n",
" elif geom_type == \"MultiLineString\":\n",
" coord_arrays = np.concatenate([coord_arrays, np.append(getLineCoords(part, coord_type), np.nan)])\n",
" elif geom_type == \"MultiPolygon\":\n",
" coord_arrays = np.concatenate([coord_arrays, np.append(getPolyCoords(part, coord_type), np.nan)])\n",
"\n",
" # Return the coordinates\n",
" return coord_arrays\n",
"\n",
"def get_coords(row, coord_type):\n",
" \"\"\"Returns the coordinates ('x' or 'y') of edges of a Polygon exterior\"\"\"\n",
" try:\n",
" # plot a single polygon\n",
" return getPolyCoords(row['geometry'], coord_type)\n",
" except Exception as e:\n",
" # plot multiple polygons\n",
" return multiGeomHandler(row['geometry'], coord_type, 'MultiPolygon')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## plot data on the shapefile\n",
"### references:\n",
"\n",
"https://docs.bokeh.org/en/latest/docs/gallery/texas.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def plot_mongo_doc(data, projection=4326, save_fig=True, show_fig=True):\n",
" \n",
" df = {}\n",
" geographies = {}\n",
" datasets = data['payload'].keys()\n",
" \n",
" for dataset in datasets:\n",
" \n",
" granularity = data['payload'][dataset]['granularity']\n",
" print(f\"dataset: {dataset}, granularity: {granularity}\")\n",
" instance_col_name = 'ID'\n",
"\n",
" df[dataset] = pd.DataFrame.from_dict(data['payload'][dataset]['data'], orient='index', columns=[f\"{dataset}_value\"])\n",
" df[dataset][instance_col_name] = df[dataset].index\n",
"\n",
" geographies[dataset] = read_file(f\"{shapefile_dir}/simple1000_clipped_{granularity}.shp\").to_crs(epsg=projection)\n",
" geographies[dataset] = geographies[dataset].merge(df[dataset], on=instance_col_name)\n",
"\n",
" # reset the color palette\n",
" color_mapper = LogColorMapper(palette=palette)\n",
"\n",
" geographies[dataset]['x'] = geographies[dataset].apply(get_coords, coord_type='x', axis=1)\n",
" geographies[dataset]['y'] = geographies[dataset].apply(get_coords, coord_type='y', axis=1)\n",
"\n",
" plot_data=dict(\n",
" x=geographies[dataset]['x'].tolist(),\n",
" y=geographies[dataset]['y'].tolist(),\n",
" name=geographies[dataset]['ID'].tolist(),\n",
" value=geographies[dataset][f\"{dataset}_value\"].tolist()\n",
" )\n",
"\n",
" TOOLS = \"pan,wheel_zoom,reset,hover,save,box_zoom\"\n",
"\n",
" coords_tuple = (\"(Lat, Lon)\", \"($y, $x)\") if projection == 4326 else (\"(x, y)\", \"($x, $y)\")\n",
" fig = figure(\n",
" title=f\"USA {dataset}\", tools=TOOLS,\n",
" plot_width=plot_width, plot_height=plot_height,\n",
" x_axis_location=None, y_axis_location=None,\n",
" tooltips=[\n",
" (\"Name\", \"@name\"), (\"Value\", \"@value\"), coords_tuple\n",
" ])\n",
" fig.grid.grid_line_color = None\n",
" fig.hover.point_policy = \"follow_mouse\"\n",
"\n",
" fig.patches('x', 'y', source=plot_data,\n",
" fill_color={'field': 'value', 'transform': color_mapper},\n",
" fill_alpha=0.7, line_color=\"white\", line_width=0.5)\n",
"\n",
" if save_fig:\n",
" output_file(f\"{dataset}.html\")\n",
" save(fig)\n",
" if show_fig:\n",
" show(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## load and plot the data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open(json_file) as f:\n",
" data = json.load(f)\n",
"\n",
"plot_mongo_doc(data, projection=projection, show_fig=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

191
viz/plot.py Normal file
View File

@@ -0,0 +1,191 @@
#!/usr/bin/env python
# coding: utf-8
# Plot data on a choropleth map
# import packages
import sys
import numpy as np
from geopandas import read_file
import pandas as pd
import json
from bokeh.io import show
from bokeh.models import LogColorMapper
from bokeh.palettes import Blues256 as palette
palette.reverse()
from bokeh.plotting import figure, output_file, save
# parameters
json_file = sys.argv[1] # path to the Mongo document to plot
shapefile_dir = "../graphs/shapefiles/" # path to the directory of shapefiles
plot_width = 1200 # pixel width of the plot
plot_height = 800 # pixel height of the plot
projection = 4326 # coordinate reference system to use for plotting
# geopandas functions for getting coordinates
# references:
# https://automating-gis-processes.github.io/2016/Lesson5-interactive-map-bokeh.html
# https://discourse.bokeh.org/t/mapping-europe-with-bokeh-using-geopandas-and-handling-multipolygons/2571
def getXYCoords(geometry, coord_type):
# Returns either x or y coordinates from geometry coordinate sequence. Used with LineString and Polygon geometries."""
if coord_type == 'x':
return list(geometry.coords.xy[0])
elif coord_type == 'y':
return list(geometry.coords.xy[1])
def getPolyCoords(geometry, coord_type):
# Returns Coordinates of Polygon using the Exterior of the Polygon."""
ext = geometry.exterior
return getXYCoords(ext, coord_type)
def multiGeomHandler(multi_geometry, coord_type, geom_type):
"""
Function for handling multi-geometries. Can be MultiPoint, MultiLineString or MultiPolygon.
Returns a list of coordinates where all parts of Multi-geometries are merged into a single list.
Individual geometries are separated with np.nan which is how Bokeh wants them.
# Bokeh documentation regarding the Multi-geometry issues can be found here (it is an open issue)
# https://github.com/bokeh/bokeh/issues/2321
"""
for i, part in enumerate(multi_geometry):
# On the first part of the Multi-geometry initialize the coord_array (np.array)
if i == 0:
if geom_type == "MultiPoint":
coord_arrays = np.append(
getPointCoords(part, coord_type), np.nan
)
elif geom_type == "MultiLineString":
coord_arrays = np.append(
getLineCoords(part, coord_type), np.nan
)
elif geom_type == "MultiPolygon":
coord_arrays = np.append(
getPolyCoords(part, coord_type), np.nan
)
else:
if geom_type == "MultiPoint":
coord_arrays = np.concatenate(
[
coord_arrays,
np.append(getPointCoords(part, coord_type), np.nan),
]
)
elif geom_type == "MultiLineString":
coord_arrays = np.concatenate(
[
coord_arrays,
np.append(getLineCoords(part, coord_type), np.nan),
]
)
elif geom_type == "MultiPolygon":
coord_arrays = np.concatenate(
[
coord_arrays,
np.append(getPolyCoords(part, coord_type), np.nan),
]
)
# Return the coordinates
return coord_arrays
def get_coords(row, coord_type):
"""Returns the coordinates ('x' or 'y') of edges of a Polygon exterior"""
try:
# plot a single polygon
return getPolyCoords(row['geometry'], coord_type)
except Exception as e:
# plot multiple polygons
return multiGeomHandler(row['geometry'], coord_type, 'MultiPolygon')
# plot data on the shapefile
# references:
# https://docs.bokeh.org/en/latest/docs/gallery/texas.html
def plot_mongo_doc(data, projection=4326, save_fig=True, show_fig=True):
df = {}
geographies = {}
datasets = data['payload'].keys()
for dataset in datasets:
granularity = data['payload'][dataset]['granularity']
print(f"dataset: {dataset}, granularity: {granularity}")
instance_col_name = 'ID'
df[dataset] = pd.DataFrame.from_dict(
data['payload'][dataset]['data'],
orient='index',
columns=[f"{dataset}_value"],
)
df[dataset][instance_col_name] = df[dataset].index
geographies[dataset] = read_file(
f"{shapefile_dir}/simple1000_clipped_{granularity}.shp"
).to_crs(epsg=projection)
geographies[dataset] = geographies[dataset].merge(
df[dataset], on=instance_col_name
)
# reset the color palette
color_mapper = LogColorMapper(palette=palette)
geographies[dataset]['x'] = geographies[dataset].apply(
get_coords, coord_type='x', axis=1
)
geographies[dataset]['y'] = geographies[dataset].apply(
get_coords, coord_type='y', axis=1
)
plot_data = dict(
x=geographies[dataset]['x'].tolist(),
y=geographies[dataset]['y'].tolist(),
name=geographies[dataset]['ID'].tolist(),
value=geographies[dataset][f"{dataset}_value"].tolist(),
)
TOOLS = "pan,wheel_zoom,reset,hover,save,box_zoom"
coords_tuple = (
("(Lat, Lon)", "($y, $x)")
if projection == 4326
else ("(x, y)", "($x, $y)")
)
fig = figure(
title=f"USA {dataset}",
tools=TOOLS,
plot_width=plot_width,
plot_height=plot_height,
x_axis_location=None,
y_axis_location=None,
tooltips=[("Name", "@name"), ("Value", "@value"), coords_tuple],
)
fig.grid.grid_line_color = None
fig.hover.point_policy = "follow_mouse"
fig.patches(
'x',
'y',
source=plot_data,
fill_color={'field': 'value', 'transform': color_mapper},
fill_alpha=0.7,
line_color="white",
line_width=0.5,
)
if save_fig:
output_file(f"{dataset}.html")
save(fig)
if show_fig:
show(fig)
# load and plot the data
with open(json_file) as f:
data = json.load(f)
plot_mongo_doc(data, projection=projection, show_fig=False)