From 93010ace6dc16f2bc3a798a4f9e0ac3b1458156f Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Mon, 3 Feb 2020 11:06:22 -0500 Subject: [PATCH 1/4] datashader tutorial --- doc/python/datashader.md | 124 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 doc/python/datashader.md diff --git a/doc/python/datashader.md b/doc/python/datashader.md new file mode 100644 index 00000000000..e39a446a0f5 --- /dev/null +++ b/doc/python/datashader.md @@ -0,0 +1,124 @@ +--- +jupyter: + jupytext: + notebook_metadata_filter: all + text_representation: + extension: .md + format_name: markdown + format_version: '1.2' + jupytext_version: 1.3.0 + kernelspec: + display_name: Python 3 + language: python + name: python3 + language_info: + codemirror_mode: + name: ipython + version: 3 + file_extension: .py + mimetype: text/x-python + name: python + nbconvert_exporter: python + pygments_lexer: ipython3 + version: 3.7.3 + plotly: + description: How to use datashader to rasterize large datasets, and visualize + the generated raster data with plotly. + display_as: scientific + language: python + layout: base + name: Using datashader with plotly + order: 21 + page_type: u-guide + permalink: python/datashader/ + thumbnail: thumbnail/datashader.jpg +--- + +[datashader](https://datashader.org/) creates rasterized representations of large datasets for easier visualization, with a pipeline approach consisting of several steps: projecting the data on a regular grid, creating a color representation of the grid, etc. + +### Passing datashader rasters as a mabox image layer + +We visualize here the spatial distribution of taxi rides in New York City. A higher density +is observed on major avenues. For more details about mapbox charts, see [the mapbox layers tutorial](/python/mapbox-layers). No mapbox token is needed here. + +```python +import pandas as pd +df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/uber-rides-data1.csv') +dff = df.query('Lat < 40.82').query('Lat > 40.70').query('Lon > -74.02').query('Lon < -73.91') + +import datashader as ds +cvs = ds.Canvas(plot_width=1000, plot_height=1000) +agg = cvs.points(dff, x='Lon', y='Lat') +# agg is an xarray object, see http://xarray.pydata.org/en/stable/ for more details +coords_lat, coords_lon = agg.coords['Lat'].values, agg.coords['Lon'].values +# Corners of the image, which need to be passed to mapbox +coordinates = [[coords_lon[0], coords_lat[0]], + [coords_lon[-1], coords_lat[0]], + [coords_lon[-1], coords_lat[-1]], + [coords_lon[0], coords_lat[-1]]] + +from colorcet import fire +import datashader.transfer_functions as tf +img = tf.shade(agg, cmap=fire)[::-1].to_pil() + +import plotly.express as px +# Trick to create rapidly a figure with mapbox axes +fig = px.scatter_mapbox(dff[:1], lat='Lat', lon='Lon', zoom=12) +# Add the datashader image as a mapbox layer image +fig.update_layout(mapbox_style="carto-darkmatter", + mapbox_layers = [ + { + "sourcetype": "image", + "source": img, + "coordinates": coordinates + }] +) +fig.show() +``` + +### Exploring correlations of a large dataset + +Here we explore the flight delay dataset from https://www.kaggle.com/usdot/flight-delays. In order to get a visual impression of the correlation between features, we generate a datashader rasterized array which we plot using a `Heatmap` trace. It creates a much clearer visualization than a scatter plot of (even a fraction of) the data points, as shown below. + +Note that instead of datashader it would theoretically be possible to create a [2d histogram](/python/2d-histogram-contour/) with plotly but this is not recommended here because you would need to load the whole dataset (5M rows !) in the browser for plotly.js to compute the heatmap, which is practically not tractable. Datashader offers the possibility to reduce the size of the dataset before passing it to the browser. + +```python +import plotly.graph_objects as go +import pandas as pd +import numpy as np +import datashader as ds +df = pd.read_parquet('https://raw.githubusercontent.com/plotly/datasets/master/2015_flights.parquet') +fig = go.Figure(go.Scattergl(x=df['SCHEDULED_DEPARTURE'][::200], + y=df['DEPARTURE_DELAY'][::200], + mode='markers') +) +fig.update_layout(title_text='A busy plot') +fig.show() +``` + +```python +import plotly.graph_objects as go +import pandas as pd +import numpy as np +import datashader as ds +df = pd.read_parquet('https://raw.githubusercontent.com/plotly/datasets/master/2015_flights.parquet') + +cvs = ds.Canvas(plot_width=100, plot_height=100) +agg = cvs.points(df, 'SCHEDULED_DEPARTURE', 'DEPARTURE_DELAY') +x = np.array(agg.coords['SCHEDULED_DEPARTURE']) +y = np.array(agg.coords['DEPARTURE_DELAY']) + +# Assign nan to zero values so that the corresponding pixels are transparent +agg = np.array(agg.values, dtype=np.float) +agg[agg<1] = np.nan + +fig = go.Figure(go.Heatmap( + z=np.log10(agg), x=x, y=y, + hoverongaps=False, + hovertemplate='Scheduled departure: %{x:.1f}h
Depature delay: %{y}
Log10(Count): %{z}', + colorbar=dict(title='Count (Log)', tickprefix='1.e'))) +fig.update_xaxes(title_text='Scheduled departure') +fig.update_yaxes(title_text='Departure delay') +fig.show() + +``` From b6fb0e7a25d647146cb96674ec2b7512403302d5 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Mon, 3 Feb 2020 11:17:00 -0500 Subject: [PATCH 2/4] added requirements --- binder/requirements.txt | 1 + doc/requirements.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/binder/requirements.txt b/binder/requirements.txt index 52868c42d97..d63486e33ec 100644 --- a/binder/requirements.txt +++ b/binder/requirements.txt @@ -12,3 +12,4 @@ psutil requests networkx scikit-image +datashader diff --git a/doc/requirements.txt b/doc/requirements.txt index 988f05efdbb..68a6f06b2b8 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -22,3 +22,4 @@ sphinx_bootstrap_theme recommonmark pathlib python-frontmatter +datashader From b55d142bc320e82fc9a8b37d91fbb739ae033685 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Mon, 3 Feb 2020 14:35:31 -0500 Subject: [PATCH 3/4] CI fixup --- binder/requirements.txt | 1 + doc/python/datashader.md | 25 +++++++++++++++---------- doc/requirements.txt | 1 + 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/binder/requirements.txt b/binder/requirements.txt index d63486e33ec..d59fb34283f 100644 --- a/binder/requirements.txt +++ b/binder/requirements.txt @@ -13,3 +13,4 @@ requests networkx scikit-image datashader +pyarrow diff --git a/doc/python/datashader.md b/doc/python/datashader.md index e39a446a0f5..b36b1a39488 100644 --- a/doc/python/datashader.md +++ b/doc/python/datashader.md @@ -5,8 +5,8 @@ jupyter: text_representation: extension: .md format_name: markdown - format_version: '1.2' - jupytext_version: 1.3.0 + format_version: "1.2" + jupytext_version: 1.3.1 kernelspec: display_name: Python 3 language: python @@ -20,26 +20,27 @@ jupyter: name: python nbconvert_exporter: python pygments_lexer: ipython3 - version: 3.7.3 + version: 3.6.8 plotly: - description: How to use datashader to rasterize large datasets, and visualize + description: + How to use datashader to rasterize large datasets, and visualize the generated raster data with plotly. display_as: scientific language: python layout: base - name: Using datashader with plotly + name: Plotly and Datashader order: 21 page_type: u-guide permalink: python/datashader/ - thumbnail: thumbnail/datashader.jpg + thumbnail: thumbnail/heatmap_colorscale.jpg --- -[datashader](https://datashader.org/) creates rasterized representations of large datasets for easier visualization, with a pipeline approach consisting of several steps: projecting the data on a regular grid, creating a color representation of the grid, etc. +[datashader](https://datashader.org/) creates rasterized representations of large datasets for easier visualization, with a pipeline approach consisting of several steps: projecting the data on a regular grid, creating a color representation of the grid, etc. ### Passing datashader rasters as a mabox image layer We visualize here the spatial distribution of taxi rides in New York City. A higher density -is observed on major avenues. For more details about mapbox charts, see [the mapbox layers tutorial](/python/mapbox-layers). No mapbox token is needed here. +is observed on major avenues. For more details about mapbox charts, see [the mapbox layers tutorial](/python/mapbox-layers). No mapbox token is needed here. ```python import pandas as pd @@ -88,7 +89,7 @@ import pandas as pd import numpy as np import datashader as ds df = pd.read_parquet('https://raw.githubusercontent.com/plotly/datasets/master/2015_flights.parquet') -fig = go.Figure(go.Scattergl(x=df['SCHEDULED_DEPARTURE'][::200], +fig = go.Figure(go.Scattergl(x=df['SCHEDULED_DEPARTURE'][::200], y=df['DEPARTURE_DELAY'][::200], mode='markers') ) @@ -113,7 +114,7 @@ agg = np.array(agg.values, dtype=np.float) agg[agg<1] = np.nan fig = go.Figure(go.Heatmap( - z=np.log10(agg), x=x, y=y, + z=np.log10(agg), x=x, y=y, hoverongaps=False, hovertemplate='Scheduled departure: %{x:.1f}h
Depature delay: %{y}
Log10(Count): %{z}', colorbar=dict(title='Count (Log)', tickprefix='1.e'))) @@ -122,3 +123,7 @@ fig.update_yaxes(title_text='Departure delay') fig.show() ``` + +```python + +``` diff --git a/doc/requirements.txt b/doc/requirements.txt index 68a6f06b2b8..37d0944f8f2 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -23,3 +23,4 @@ recommonmark pathlib python-frontmatter datashader +pyarrow From 1e87d6ee54b45326e0ce12481bc8598a086de08e Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Mon, 3 Feb 2020 15:37:02 -0500 Subject: [PATCH 4/4] icon + links --- doc/python/datashader.md | 2 +- doc/python/heatmaps.md | 5 +++++ doc/python/imshow.md | 7 +++++++ doc/python/mapbox-layers.md | 4 ++++ doc/python/webgl-vs-svg.md | 4 ++++ 5 files changed, 21 insertions(+), 1 deletion(-) diff --git a/doc/python/datashader.md b/doc/python/datashader.md index b36b1a39488..9a9a3ba9f52 100644 --- a/doc/python/datashader.md +++ b/doc/python/datashader.md @@ -32,7 +32,7 @@ jupyter: order: 21 page_type: u-guide permalink: python/datashader/ - thumbnail: thumbnail/heatmap_colorscale.jpg + thumbnail: thumbnail/datashader.jpg --- [datashader](https://datashader.org/) creates rasterized representations of large datasets for easier visualization, with a pipeline approach consisting of several steps: projecting the data on a regular grid, creating a color representation of the grid, etc. diff --git a/doc/python/heatmaps.md b/doc/python/heatmaps.md index aac6a570455..ec2735bf725 100644 --- a/doc/python/heatmaps.md +++ b/doc/python/heatmaps.md @@ -162,5 +162,10 @@ fig.update_layout( fig.show() ``` +### Heatmap and datashader + +Arrays of rasterized values build by datashader can be visualized using +plotly's heatmaps, as shown in the [plotly and datashader tutorial](/python/datashader/). + #### Reference See https://plot.ly/python/reference/#heatmap for more information and chart attribute options! diff --git a/doc/python/imshow.md b/doc/python/imshow.md index 349732e82a7..ba5b0c20cb7 100644 --- a/doc/python/imshow.md +++ b/doc/python/imshow.md @@ -198,6 +198,13 @@ fig.update_layout(height=400) fig.show() ``` +### imshow and datashader + +Arrays of rasterized values build by datashader can be visualized using +imshow. See the [plotly and datashader tutorial](/python/datashader/) for +examples on how to use plotly and datashader. + + #### Reference See https://plot.ly/python/reference/#image for more information and chart attribute options! diff --git a/doc/python/mapbox-layers.md b/doc/python/mapbox-layers.md index 0aac4981c1a..ecb68d5cf3b 100644 --- a/doc/python/mapbox-layers.md +++ b/doc/python/mapbox-layers.md @@ -186,6 +186,10 @@ fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}) fig.show() ``` +#### Using a mapbox image layer to display a datashader raster image + +See the example in the [plotly and datashader tutorial](/python/datashader). + #### Reference See https://plot.ly/python/reference/#layout-mapbox for more information and options! diff --git a/doc/python/webgl-vs-svg.md b/doc/python/webgl-vs-svg.md index 7b30d51527c..ff11ace247f 100644 --- a/doc/python/webgl-vs-svg.md +++ b/doc/python/webgl-vs-svg.md @@ -33,6 +33,10 @@ jupyter: thumbnail: thumbnail/webgl.jpg --- +Here we show that it is possible to represent millions of points with WebGL. +For larger datasets, or for a clearer visualization of the density of points, +it is also possible to use [datashader](/python/datashader/). + #### Compare WebGL and SVG Checkout [this notebook](https://plot.ly/python/compare-webgl-svg) to compare WebGL and SVG scatter plots with 75,000 random data points