From 471ebf7c31ce60b756d791e1f13db0988152299d Mon Sep 17 00:00:00 2001 From: Koji Shiromoto Date: Fri, 9 Aug 2019 12:55:39 -0400 Subject: [PATCH 1/3] added new geom --- .gitignore | 1 + _posts/ggplot2/2019-08-08-geom_count.Rmd | 175 +++++++++++++++++++++++ _posts/ggplot2/2019-08-08-geom_count.md | 171 ++++++++++++++++++++++ 3 files changed, 347 insertions(+) create mode 100644 _posts/ggplot2/2019-08-08-geom_count.Rmd create mode 100644 _posts/ggplot2/2019-08-08-geom_count.md diff --git a/.gitignore b/.gitignore index 1172d4bb959c..df0309e989a8 100755 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,4 @@ vendor/bundle _posts/python/fundamentals/static-image/images _config_personal.yml _posts/python/html +.Rproj.user diff --git a/_posts/ggplot2/2019-08-08-geom_count.Rmd b/_posts/ggplot2/2019-08-08-geom_count.Rmd new file mode 100644 index 000000000000..1934656da276 --- /dev/null +++ b/_posts/ggplot2/2019-08-08-geom_count.Rmd @@ -0,0 +1,175 @@ +--- +title: geom_count | Examples | Plotly +name: geom_count +permalink: ggplot2/geom_count/ +description: How to make a 2-dimensional frequency graph in ggplot2 using geom_count Examples of coloured and facetted graphs. +layout: base +thumbnail: thumbnail/geom_count.jpg +language: ggplot2 +page_type: example_index +has_thumbnail: true +display_as: statistical +order: 2 +output: + html_document: + keep_md: true +--- + +```{r, echo = FALSE, message=FALSE} +knitr::opts_chunk$set(message = FALSE, warning=FALSE) +Sys.setenv("plotly_username"="RPlotBot") +Sys.setenv("plotly_api_key"="q0lz6r5efr") +``` + +### New to Plotly? + +Plotly's R library is free and open source!
+[Get started](https://plot.ly/r/getting-started/) by downloading the client and [reading the primer](https://plot.ly/r/getting-started/).
+You can set up Plotly to work in [online](https://plot.ly/r/getting-started/#hosting-graphs-in-your-online-plotly-account) or [offline](https://plot.ly/r/offline/) mode.
+We also have a quick-reference [cheatsheet](https://images.plot.ly/plotly-documentation/images/r_cheat_sheet.pdf) (new!) to help you get started! + +### Version Check + +Version 4 of Plotly's R package is now [available](https://plot.ly/r/getting-started/#installation)!
+Check out [this post](http://moderndata.plot.ly/upgrading-to-plotly-4-0-and-above/) for more information on breaking changes and new features available in this version. + +```{r} +library(plotly) +packageVersion('plotly') +``` + +### Basic geom\_count Plot +geom\_count is a way to plot two variables that are not continuous. Here's a modified version of the nycflights13 dataset that comes with R; it shows 2013 domestic flights leaving New York's three airports. This graph maps two categorical variables: which of America's major airports it was headed to, and which major carrier was operating it. + +It's good to show the ful airport names for destinations, rather than just the airport codes. You can use aes(group = ), which doesn't modify the graph in any way but adds information to the labels. + +```{r, results='hide'} +flightdata <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/flightdata.csv", stringsAsFactors = FALSE) + +library(plotly) + +p <- ggplot(flightdata, aes(y=airline, x=dest, colour = dest, group=airport)) + + geom_count(alpha=0.5) + + labs(title = "Flights from New York to major domestic destinations", + x = "Origin and destination", + y = "Airline", + size = "") +ggplotly(p) + +# Create a shareable link to your chart +# Set up API credentials: https://plot.ly/r/getting-started +chart_link = api_create(p, filename="geom_count/basic-plot") +chart_link +``` + +```{r echo=FALSE} +chart_link +``` + +### Adding a Third Variable +By using facets, we can add a third variable: which of New York's three airports it departed from. We can also colour-code by this variable. + +```{r, results='hide'} +library(plotly) + +p <- ggplot(flightdata, aes(y=airline, x=origin, colour=origin, group=airport)) + + geom_count(alpha=0.5) + + facet_grid(. ~ dest) + + labs(title = "Flights from New York to major domestic destinations", + x = "Origin and destination", + y = "Airline", + size = "") +ggplotly(p) + +# Create a shareable link to your chart +# Set up API credentials: https://plot.ly/r/getting-started +chart_link = api_create(p, filename="geom_count/three-variables") +chart_link +``` + +```{r echo=FALSE} +chart_link +``` + +### Customized appearance +The airport labels at the bottom aren't very visible and aren't very important, since there's a colour key to the side; we can get rid of the text and ticks using theme() options. Let's also use the LaCroixColoR package to give this geom\_count chart a new colour scheme. + +```{r, results='hide'} +library(plotly) +library(LaCroixColoR) + +p <- ggplot(flightdata, aes(y=airline, x=origin, colour=origin, group=airport)) + + geom_count(alpha=0.5) + + facet_grid(. ~ dest) + + scale_colour_manual(values = lacroix_palette("PassionFruit", n=3)) + + theme(axis.text.x = element_blank(), + axis.ticks.x = element_blank()) + + labs(title = "Flights from New York to major domestic destinations", + x = "Origin and destination", + y = "Airline", + size = "") +ggplotly(p) + +# Create a shareable link to your chart +# Set up API credentials: https://plot.ly/r/getting-started +chart_link = api_create(p, filename="geom_count/customize-theme") +chart_link +``` + +```{r echo=FALSE} +chart_link +``` + +### geom\_count vs geom\_point +Here's a comparison of geom\_count and geom\_point on the same dataset (rounded for geom\_count). Geom\_point has the advantage of allowing multiple colours on the same graph, as well as a label for each point. But even with a low alpha, there are too many overlapping points to understand what the actual distribution looks like, only a general impression. + +```{r, results='hide'} +library(plotly) +library(dplyr) + +beers <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/beers.csv", stringsAsFactors = FALSE) +df <- beers %>% + mutate(abv = round(abv*100), + ibu = round(ibu/10)*10) %>% + filter(!is.na(style2)) + +p <- ggplot(df, aes(x=abv, y=ibu, colour=style2)) + + geom_count(alpha=0.5) + + theme(legend.position = "none") + + facet_wrap(~style2) +ggplotly(p) + +# Create a shareable link to your chart +# Set up API credentials: https://plot.ly/r/getting-started +chart_link = api_create(p, filename="geom_count/compare-count") +chart_link +``` + +```{r echo=FALSE} +chart_link +``` + +```{r, results='hide'} +library(plotly) +library(dplyr) + +df <- filter(beers, !is.na(style2)) + +p <- ggplot(df, aes(x=abv, y=ibu, colour=style2)) + + geom_point(alpha=0.2, aes(text = label)) + + theme(legend.position = "none") + + facet_wrap(~style2) + + labs(y = "bitterness (IBU)", + x = "alcohol volume (ABV)", + title = "Craft beers from American breweries") +ggplotly(p) + +# Create a shareable link to your chart +# Set up API credentials: https://plot.ly/r/getting-started +chart_link = api_create(p, filename="geom_count/compare-point") +chart_link +``` + +```{r echo=FALSE} +chart_link +``` diff --git a/_posts/ggplot2/2019-08-08-geom_count.md b/_posts/ggplot2/2019-08-08-geom_count.md new file mode 100644 index 000000000000..374ee0bf70f6 --- /dev/null +++ b/_posts/ggplot2/2019-08-08-geom_count.md @@ -0,0 +1,171 @@ +--- +title: geom_count | Examples | Plotly +name: geom_count +permalink: ggplot2/geom_count/ +description: How to make a 2-dimensional frequency graph in ggplot2 using geom_count Examples of coloured and facetted graphs. +layout: base +thumbnail: thumbnail/geom_count.jpg +language: ggplot2 +page_type: example_index +has_thumbnail: true +display_as: statistical +order: 2 +output: + html_document: + keep_md: true +--- + + + +### New to Plotly? + +Plotly's R library is free and open source!
+[Get started](https://plot.ly/r/getting-started/) by downloading the client and [reading the primer](https://plot.ly/r/getting-started/).
+You can set up Plotly to work in [online](https://plot.ly/r/getting-started/#hosting-graphs-in-your-online-plotly-account) or [offline](https://plot.ly/r/offline/) mode.
+We also have a quick-reference [cheatsheet](https://images.plot.ly/plotly-documentation/images/r_cheat_sheet.pdf) (new!) to help you get started! + +### Version Check + +Version 4 of Plotly's R package is now [available](https://plot.ly/r/getting-started/#installation)!
+Check out [this post](http://moderndata.plot.ly/upgrading-to-plotly-4-0-and-above/) for more information on breaking changes and new features available in this version. + + +```r +library(plotly) +packageVersion('plotly') +``` + +``` +## [1] '4.8.0.9000' +``` + +### Basic geom\_count Plot +geom\_count is a way to plot two variables that are not continuous. Here's a modified version of the nycflights13 dataset that comes with R; it shows 2013 domestic flights leaving New York's three airports. This graph maps two categorical variables: which of America's major airports it was headed to, and which major carrier was operating it. + +It's good to show the ful airport names for destinations, rather than just the airport codes. You can use aes(group = ), which doesn't modify the graph in any way but adds information to the labels. + + +```r +flightdata <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/flightdata.csv", stringsAsFactors = FALSE) + +library(plotly) + +p <- ggplot(flightdata, aes(y=airline, x=dest, colour = dest, group=airport)) + + geom_count(alpha=0.5) + + labs(title = "Flights from New York to major domestic destinations", + x = "Origin and destination", + y = "Airline", + size = "") +ggplotly(p) + +# Create a shareable link to your chart +# Set up API credentials: https://plot.ly/r/getting-started +chart_link = api_create(p, filename="geom_count/basic-plot") +chart_link +``` + + + +### Adding a Third Variable +By using facets, we can add a third variable: which of New York's three airports it departed from. We can also colour-code by this variable. + + +```r +library(plotly) + +p <- ggplot(flightdata, aes(y=airline, x=origin, colour=origin, group=airport)) + + geom_count(alpha=0.5) + + facet_grid(. ~ dest) + + labs(title = "Flights from New York to major domestic destinations", + x = "Origin and destination", + y = "Airline", + size = "") +ggplotly(p) + +# Create a shareable link to your chart +# Set up API credentials: https://plot.ly/r/getting-started +chart_link = api_create(p, filename="geom_count/three-variables") +chart_link +``` + + + +### Customized appearance +The airport labels at the bottom aren't very visible and aren't very important, since there's a colour key to the side; we can get rid of the text and ticks using theme() options. Let's also use the LaCroixColoR package to give this geom\_count chart a new colour scheme. + + +```r +library(plotly) +library(LaCroixColoR) + +p <- ggplot(flightdata, aes(y=airline, x=origin, colour=origin, group=airport)) + + geom_count(alpha=0.5) + + facet_grid(. ~ dest) + + scale_colour_manual(values = lacroix_palette("PassionFruit", n=3)) + + theme(axis.text.x = element_blank(), + axis.ticks.x = element_blank()) + + labs(title = "Flights from New York to major domestic destinations", + x = "Origin and destination", + y = "Airline", + size = "") +ggplotly(p) + +# Create a shareable link to your chart +# Set up API credentials: https://plot.ly/r/getting-started +chart_link = api_create(p, filename="geom_count/customize-theme") +chart_link +``` + + + +### geom\_count vs geom\_point +Here's a comparison of geom\_count and geom\_point on the same dataset (rounded for geom\_count). Geom\_point has the advantage of allowing multiple colours on the same graph, as well as a label for each point. But even with a low alpha, there are too many overlapping points to understand what the actual distribution looks like, only a general impression. + + +```r +library(plotly) +library(dplyr) + +beers <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/beers.csv", stringsAsFactors = FALSE) +df <- beers %>% + mutate(abv = round(abv*100), + ibu = round(ibu/10)*10) %>% + filter(!is.na(style2)) + +p <- ggplot(df, aes(x=abv, y=ibu, colour=style2)) + + geom_count(alpha=0.5) + + theme(legend.position = "none") + + facet_wrap(~style2) +ggplotly(p) + +# Create a shareable link to your chart +# Set up API credentials: https://plot.ly/r/getting-started +chart_link = api_create(p, filename="geom_count/compare-count") +chart_link +``` + + + + +```r +library(plotly) +library(dplyr) + +df <- filter(beers, !is.na(style2)) + +p <- ggplot(df, aes(x=abv, y=ibu, colour=style2)) + + geom_point(alpha=0.2, aes(text = label)) + + theme(legend.position = "none") + + facet_wrap(~style2) + + labs(y = "bitterness (IBU)", + x = "alcohol volume (ABV)", + title = "Craft beers from American breweries") +ggplotly(p) + +# Create a shareable link to your chart +# Set up API credentials: https://plot.ly/r/getting-started +chart_link = api_create(p, filename="geom_count/compare-point") +chart_link +``` + + From 19287474f900fa7aee07736946fa560ea1d020d3 Mon Sep 17 00:00:00 2001 From: Koji Shiromoto Date: Mon, 12 Aug 2019 18:06:14 -0400 Subject: [PATCH 2/3] edited --- _posts/ggplot2/2019-08-08-geom_count.Rmd | 8 +++++--- _posts/ggplot2/2019-08-08-geom_count.md | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/_posts/ggplot2/2019-08-08-geom_count.Rmd b/_posts/ggplot2/2019-08-08-geom_count.Rmd index 1934656da276..cc635d7475be 100644 --- a/_posts/ggplot2/2019-08-08-geom_count.Rmd +++ b/_posts/ggplot2/2019-08-08-geom_count.Rmd @@ -44,9 +44,8 @@ geom\_count is a way to plot two variables that are not continuous. Here's a mod It's good to show the ful airport names for destinations, rather than just the airport codes. You can use aes(group = ), which doesn't modify the graph in any way but adds information to the labels. ```{r, results='hide'} -flightdata <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/flightdata.csv", stringsAsFactors = FALSE) - library(plotly) +flightdata <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/flightdata.csv", stringsAsFactors = FALSE) p <- ggplot(flightdata, aes(y=airline, x=dest, colour = dest, group=airport)) + geom_count(alpha=0.5) + @@ -71,6 +70,7 @@ By using facets, we can add a third variable: which of New York's three airports ```{r, results='hide'} library(plotly) +flightdata <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/flightdata.csv", stringsAsFactors = FALSE) p <- ggplot(flightdata, aes(y=airline, x=origin, colour=origin, group=airport)) + geom_count(alpha=0.5) + @@ -97,6 +97,7 @@ The airport labels at the bottom aren't very visible and aren't very important, ```{r, results='hide'} library(plotly) library(LaCroixColoR) +flightdata <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/flightdata.csv", stringsAsFactors = FALSE) p <- ggplot(flightdata, aes(y=airline, x=origin, colour=origin, group=airport)) + geom_count(alpha=0.5) + @@ -126,8 +127,8 @@ Here's a comparison of geom\_count and geom\_point on the same dataset (rounded ```{r, results='hide'} library(plotly) library(dplyr) - beers <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/beers.csv", stringsAsFactors = FALSE) + df <- beers %>% mutate(abv = round(abv*100), ibu = round(ibu/10)*10) %>% @@ -152,6 +153,7 @@ chart_link ```{r, results='hide'} library(plotly) library(dplyr) +beers <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/beers.csv", stringsAsFactors = FALSE) df <- filter(beers, !is.na(style2)) diff --git a/_posts/ggplot2/2019-08-08-geom_count.md b/_posts/ggplot2/2019-08-08-geom_count.md index 374ee0bf70f6..4492d503c8a4 100644 --- a/_posts/ggplot2/2019-08-08-geom_count.md +++ b/_posts/ggplot2/2019-08-08-geom_count.md @@ -46,9 +46,8 @@ It's good to show the ful airport names for destinations, rather than just the a ```r -flightdata <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/flightdata.csv", stringsAsFactors = FALSE) - library(plotly) +flightdata <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/flightdata.csv", stringsAsFactors = FALSE) p <- ggplot(flightdata, aes(y=airline, x=dest, colour = dest, group=airport)) + geom_count(alpha=0.5) + @@ -72,6 +71,7 @@ By using facets, we can add a third variable: which of New York's three airports ```r library(plotly) +flightdata <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/flightdata.csv", stringsAsFactors = FALSE) p <- ggplot(flightdata, aes(y=airline, x=origin, colour=origin, group=airport)) + geom_count(alpha=0.5) + @@ -97,6 +97,7 @@ The airport labels at the bottom aren't very visible and aren't very important, ```r library(plotly) library(LaCroixColoR) +flightdata <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/flightdata.csv", stringsAsFactors = FALSE) p <- ggplot(flightdata, aes(y=airline, x=origin, colour=origin, group=airport)) + geom_count(alpha=0.5) + @@ -125,8 +126,8 @@ Here's a comparison of geom\_count and geom\_point on the same dataset (rounded ```r library(plotly) library(dplyr) - beers <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/beers.csv", stringsAsFactors = FALSE) + df <- beers %>% mutate(abv = round(abv*100), ibu = round(ibu/10)*10) %>% @@ -150,6 +151,7 @@ chart_link ```r library(plotly) library(dplyr) +beers <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/beers.csv", stringsAsFactors = FALSE) df <- filter(beers, !is.na(style2)) From 8f0690c85be4fb57ebcb8b73c17c62de4671139a Mon Sep 17 00:00:00 2001 From: bobidou23 <51970305+bobidou23@users.noreply.github.com> Date: Sun, 18 Aug 2019 17:27:15 -0400 Subject: [PATCH 3/3] correct typo Co-Authored-By: michaelbabyn <41019918+michaelbabyn@users.noreply.github.com> --- _posts/ggplot2/2019-08-08-geom_count.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_posts/ggplot2/2019-08-08-geom_count.Rmd b/_posts/ggplot2/2019-08-08-geom_count.Rmd index cc635d7475be..40a4cdb85011 100644 --- a/_posts/ggplot2/2019-08-08-geom_count.Rmd +++ b/_posts/ggplot2/2019-08-08-geom_count.Rmd @@ -41,7 +41,7 @@ packageVersion('plotly') ### Basic geom\_count Plot geom\_count is a way to plot two variables that are not continuous. Here's a modified version of the nycflights13 dataset that comes with R; it shows 2013 domestic flights leaving New York's three airports. This graph maps two categorical variables: which of America's major airports it was headed to, and which major carrier was operating it. -It's good to show the ful airport names for destinations, rather than just the airport codes. You can use aes(group = ), which doesn't modify the graph in any way but adds information to the labels. +It's good to show the full airport names for destinations, rather than just the airport codes. You can use aes(group = ), which doesn't modify the graph in any way but adds information to the labels. ```{r, results='hide'} library(plotly)