update notebook

Former-commit-id: e3e56624943670abf758a0d86414e7251610ed9c
This commit is contained in:
Michael T. Kelbaugh
2020-03-16 17:43:28 -04:00
parent f004d64bc2
commit 21e54e7bf8

View File

@@ -13,11 +13,11 @@
"metadata": {},
"outputs": [],
"source": [
"json_file = \"path/to/mongo_doc.json\" # path to the Mongo document to plot\n",
"shapefile_dir = \"path/to/shapefiles/\" # path to the directory of shapefiles\n",
"json_file = \"demo/gfdl_cm3.json\" # path to the Mongo document to plot\n",
"shapefile_dir = \"../graphs/shapefiles/\" # path to the directory of shapefiles\n",
"plot_width = 1200 # pixel width of the plot\n",
"plot_height = 800 # pixel height of the plot\n",
"projection = 4326 # coordinate reference system to use for plotting"
"projection = 4326 # coordinate reference system to use for plotting (also try 3085)"
]
},
{
@@ -33,18 +33,65 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import numpy as np\n",
"from shapely.geometry.polygon import Polygon\n",
"from shapely.geometry.multipolygon import MultiPolygon\n",
"from geopandas import read_file\n",
"import pandas as pd\n",
"import json\n",
"\n",
"from bokeh.io import show\n",
"from bokeh.models import LogColorMapper\n",
"from bokeh.palettes import Blues256 as palette\n",
"palette.reverse()\n",
"from bokeh.plotting import figure, output_file, save"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# color palettes\n",
"\n",
"from bokeh.palettes import Purples256\n",
"from bokeh.palettes import Blues256\n",
"from bokeh.palettes import Greens256\n",
"from bokeh.palettes import Oranges256\n",
"from bokeh.palettes import Reds256\n",
"from bokeh.palettes import Greys256\n",
"from bokeh.palettes import Blues256\n",
"from bokeh.palettes import Inferno256\n",
"from bokeh.palettes import Magma256\n",
"from bokeh.palettes import Plasma256\n",
"from bokeh.palettes import Viridis256\n",
"from bokeh.palettes import Cividis256\n",
"from bokeh.palettes import Turbo256\n",
"\n",
"from bokeh.palettes import PuOr11\n",
"from bokeh.palettes import BrBG11\n",
"from bokeh.palettes import PRGn11\n",
"from bokeh.palettes import PiYG11\n",
"from bokeh.palettes import RdBu11\n",
"from bokeh.palettes import RdGy11\n",
"from bokeh.palettes import RdYlBu11\n",
"from bokeh.palettes import Spectral11\n",
"from bokeh.palettes import RdYlGn11\n",
"\n",
"from bokeh.palettes import YlGn9\n",
"from bokeh.palettes import YlGnBu9\n",
"from bokeh.palettes import GnBu9\n",
"from bokeh.palettes import BuGn9\n",
"from bokeh.palettes import PuBuGn9\n",
"from bokeh.palettes import PuBu9\n",
"from bokeh.palettes import BuPu9\n",
"from bokeh.palettes import RdPu9\n",
"from bokeh.palettes import PuRd9\n",
"from bokeh.palettes import OrRd9\n",
"from bokeh.palettes import YlOrRd9\n",
"from bokeh.palettes import YlOrBr9"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -63,62 +110,55 @@
"metadata": {},
"outputs": [],
"source": [
"def getXYCoords(geometry, coord_type):\n",
" # Returns either x or y coordinates from geometry coordinate sequence. Used with LineString and Polygon geometries.\"\"\"\n",
"def get_xy_coords(geometry, coord_type):\n",
" \"\"\"\n",
" Returns either x or y coordinates from geometry coordinate sequence. Used with Polygon geometries.\n",
" \"\"\"\n",
" if coord_type == 'x':\n",
" return list(geometry.coords.xy[0])\n",
" elif coord_type == 'y':\n",
" return list(geometry.coords.xy[1])\n",
"\n",
"def getPolyCoords(geometry, coord_type):\n",
" # Returns Coordinates of Polygon using the Exterior of the Polygon.\"\"\"\n",
" ext = geometry.exterior\n",
" return getXYCoords(ext, coord_type)\n",
"\n",
"def multiGeomHandler(multi_geometry, coord_type, geom_type):\n",
"def get_poly_coords(geometry, coord_type):\n",
" \"\"\"\n",
" Function for handling multi-geometries. Can be MultiPoint, MultiLineString or MultiPolygon.\n",
" Returns Coordinates of Polygon using the Exterior of the Polygon\n",
" \"\"\"\n",
" return get_xy_coords(geometry.exterior, coord_type)\n",
"\n",
"\n",
"def multi_geom_handler(multi_geometry, coord_type):\n",
" \"\"\"\n",
" Function for handling MultiPolygon geometries.\n",
" Returns a list of coordinates where all parts of Multi-geometries are merged into a single list.\n",
" Individual geometries are separated with np.nan which is how Bokeh wants them.\n",
" # Bokeh documentation regarding the Multi-geometry issues can be found here (it is an open issue)\n",
" # https://github.com/bokeh/bokeh/issues/2321\n",
" Bokeh documentation regarding the Multi-geometry issues can be found here (it is an open issue).\n",
" https://github.com/bokeh/bokeh/issues/2321\n",
" \"\"\"\n",
"\n",
" for i, part in enumerate(multi_geometry):\n",
" # On the first part of the Multi-geometry initialize the coord_array (np.array)\n",
" if i == 0:\n",
" if geom_type == \"MultiPoint\":\n",
" coord_arrays = np.append(getPointCoords(part, coord_type), np.nan)\n",
" elif geom_type == \"MultiLineString\":\n",
" coord_arrays = np.append(getLineCoords(part, coord_type), np.nan)\n",
" elif geom_type == \"MultiPolygon\":\n",
" coord_arrays = np.append(getPolyCoords(part, coord_type), np.nan)\n",
" else:\n",
" if geom_type == \"MultiPoint\":\n",
" coord_arrays = np.concatenate([coord_arrays, np.append(getPointCoords(part, coord_type), np.nan)])\n",
" elif geom_type == \"MultiLineString\":\n",
" coord_arrays = np.concatenate([coord_arrays, np.append(getLineCoords(part, coord_type), np.nan)])\n",
" elif geom_type == \"MultiPolygon\":\n",
" coord_arrays = np.concatenate([coord_arrays, np.append(getPolyCoords(part, coord_type), np.nan)])\n",
"\n",
" # Return the coordinates\n",
" all_poly_coords = [np.append(get_poly_coords(part, coord_type), np.nan) for part in multi_geometry]\n",
" coord_arrays = np.concatenate(all_poly_coords)\n",
" return coord_arrays\n",
"\n",
"\n",
"def get_coords(row, coord_type):\n",
" \"\"\"Returns the coordinates ('x' or 'y') of edges of a Polygon exterior\"\"\"\n",
" try:\n",
" # plot a single polygon\n",
" return getPolyCoords(row['geometry'], coord_type)\n",
" except Exception as e:\n",
" # plot multiple polygons\n",
" return multiGeomHandler(row['geometry'], coord_type, 'MultiPolygon')"
" \"\"\"\n",
" Returns the coordinates ('x' or 'y') of edges of a Polygon exterior\n",
" \"\"\"\n",
" poly_type = type(row['geometry'])\n",
"\n",
" # get coords from a single polygon\n",
" if poly_type == Polygon:\n",
" return get_poly_coords(row['geometry'], coord_type)\n",
" # get coords from multiple polygons\n",
" elif poly_type == MultiPolygon:\n",
" return multi_geom_handler(row['geometry'], coord_type)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## plot data on the shapefile\n",
"## merge data with the shapefile\n",
"### references:\n",
"\n",
"https://docs.bokeh.org/en/latest/docs/gallery/texas.html"
@@ -130,54 +170,94 @@
"metadata": {},
"outputs": [],
"source": [
"def plot_mongo_doc(data, projection=4326, save_fig=True, show_fig=True):\n",
" \n",
"def plot_mongo_doc(data, shapefile_dir=\".\", palette=Blues256.reverse(), projection=4326, plot_width=1200, plot_height=800, show_fig=False, save_fig=True):\n",
"\n",
" df = {}\n",
" geographies = {}\n",
" datasets = data['payload'].keys()\n",
" \n",
"\n",
" for dataset in datasets:\n",
"\n",
" # get data\n",
" \n",
" granularity = data['payload'][dataset]['granularity']\n",
" print(f\"dataset: {dataset}, granularity: {granularity}\")\n",
" if not granularity:\n",
" print(f\"skipping {dataset} (does not have a granularity specified)\")\n",
" continue\n",
" else:\n",
" print(f\"plotting {dataset} (granularity: {granularity})\")\n",
" instance_col_name = 'ID'\n",
" year = data['year']\n",
"\n",
" df[dataset] = pd.DataFrame.from_dict(data['payload'][dataset]['data'], orient='index', columns=[f\"{dataset}_value\"])\n",
" df[dataset] = pd.DataFrame.from_dict(\n",
" data['payload'][dataset]['data'],\n",
" orient='index',\n",
" columns=[f\"{dataset}_value\"],\n",
" )\n",
" df[dataset][instance_col_name] = df[dataset].index\n",
"\n",
" geographies[dataset] = read_file(f\"{shapefile_dir}/{granularity}.shp\").to_crs(epsg=projection)\n",
" geographies[dataset] = geographies[dataset].merge(df[dataset], on=instance_col_name)\n",
" \n",
" \n",
" # merge data with the shapefile\n",
" \n",
" shapefile_path = f\"{shapefile_dir}/{granularity}.shp\"\n",
" if os.path.exists(shapefile_path):\n",
" geographies[dataset] = read_file(shapefile_path).to_crs(epsg=projection)\n",
" else:\n",
" print(f\"{shapefile_path} not found, skipping\")\n",
" continue\n",
" geographies[dataset] = geographies[dataset].merge(\n",
" df[dataset], on=instance_col_name\n",
" )\n",
" geographies[dataset]['x'] = geographies[dataset].apply(\n",
" get_coords, coord_type='x', axis=1\n",
" )\n",
" geographies[dataset]['y'] = geographies[dataset].apply(\n",
" get_coords, coord_type='y', axis=1\n",
" )\n",
" \n",
" \n",
" # create figure\n",
"\n",
" # reset the color palette\n",
" color_mapper = LogColorMapper(palette=palette)\n",
"\n",
" geographies[dataset]['x'] = geographies[dataset].apply(get_coords, coord_type='x', axis=1)\n",
" geographies[dataset]['y'] = geographies[dataset].apply(get_coords, coord_type='y', axis=1)\n",
"\n",
" plot_data=dict(\n",
" plot_data = dict(\n",
" x=geographies[dataset]['x'].tolist(),\n",
" y=geographies[dataset]['y'].tolist(),\n",
" name=geographies[dataset]['ID'].tolist(),\n",
" value=geographies[dataset][f\"{dataset}_value\"].tolist()\n",
" value=geographies[dataset][f\"{dataset}_value\"].tolist(),\n",
" )\n",
"\n",
" TOOLS = \"pan,wheel_zoom,reset,hover,save,box_zoom\"\n",
"\n",
" coords_tuple = (\"(Lat, Lon)\", \"($y, $x)\") if projection == 4326 else (\"(x, y)\", \"($x, $y)\")\n",
" \n",
" coords_tuple = (\n",
" (\"(Lat, Lon)\", \"($y, $x)\")\n",
" if projection == 4326\n",
" else (\"(x, y)\", \"($x, $y)\")\n",
" )\n",
" \n",
" fig = figure(\n",
" title=f\"USA {dataset} ({year})\", tools=TOOLS,\n",
" plot_width=plot_width, plot_height=plot_height,\n",
" x_axis_location=None, y_axis_location=None,\n",
" tooltips=[\n",
" (\"Name\", \"@name\"), (\"Value\", \"@value\"), coords_tuple\n",
" ])\n",
" title=f\"USA {dataset} ({year})\",\n",
" tools=TOOLS,\n",
" plot_width=plot_width,\n",
" plot_height=plot_height,\n",
" x_axis_location=None,\n",
" y_axis_location=None,\n",
" tooltips=[(\"Name\", \"@name\"), (\"Value\", \"@value\"), coords_tuple],\n",
" )\n",
" fig.grid.grid_line_color = None\n",
" fig.hover.point_policy = \"follow_mouse\"\n",
"\n",
" fig.patches('x', 'y', source=plot_data,\n",
" fill_color={'field': 'value', 'transform': color_mapper},\n",
" fill_alpha=0.7, line_color=\"white\", line_width=0.5)\n",
" # reset the color palette\n",
" color_mapper = LogColorMapper(palette=palette)\n",
" \n",
" fig.patches(\n",
" 'x',\n",
" 'y',\n",
" source=plot_data,\n",
" fill_color={'field': 'value', 'transform': color_mapper},\n",
" fill_alpha=0.7,\n",
" line_color=\"white\",\n",
" line_width=0.5,\n",
" )\n",
"\n",
" if save_fig:\n",
" output_file(f\"{year}_{dataset}.html\")\n",
@@ -202,7 +282,10 @@
"with open(json_file) as f:\n",
" data = json.load(f)\n",
"\n",
"plot_mongo_doc(data, projection=projection, show_fig=False)"
"palette = PuBu9\n",
"palette.reverse()\n",
" \n",
"plot_mongo_doc(data, palette=palette, shapefile_dir=shapefile_dir, projection=projection, show_fig=True)"
]
}
],
@@ -222,7 +305,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
"version": "3.6.9"
}
},
"nbformat": 4,