From 0f04bc5db3a565d0c914a23d52da37c2f87ec71f Mon Sep 17 00:00:00 2001
From: clifmckee <clifton.mckee@gmail.com>
Date: Mon, 6 Jan 2025 23:56:37 -0500
Subject: [PATCH 1/5] Address #589

---
 modules/Data_Summarization/Data_Summarization.Rmd    |  6 +++---
 .../lab/Data_Summarization_Lab.Rmd                   | 12 ++++++------
 .../lab/Data_Summarization_Lab_Key.Rmd               | 12 ++++++------
 3 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/modules/Data_Summarization/Data_Summarization.Rmd b/modules/Data_Summarization/Data_Summarization.Rmd
index 8679900a6..8152a08cd 100644
--- a/modules/Data_Summarization/Data_Summarization.Rmd
+++ b/modules/Data_Summarization/Data_Summarization.Rmd
@@ -220,8 +220,8 @@ You can also do more elaborate summaries across different groups of data using `
 ```{r, eval = FALSE}
 # General format - Not the code!
 {data to use} %>% 
-   summarize({summary column name} = {operator(source column)},
-             {summary column name} = {operator(source column)}) 
+   summarize({summary column name} = {function(source column)},
+             {summary column name} = {function(source column)}) 
 ```
 </div>
 
@@ -234,7 +234,7 @@ You can also do more elaborate summaries across different groups of data using `
 ```{r, eval = FALSE}
 # General format - Not the code!
 {data to use} %>% 
-   summarize({summary column name} = {operator(source column)}) 
+   summarize({summary column name} = {function(source column)}) 
 ```
 </div>
 
diff --git a/modules/Data_Summarization/lab/Data_Summarization_Lab.Rmd b/modules/Data_Summarization/lab/Data_Summarization_Lab.Rmd
index 1fe8cdb91..c88f3e044 100644
--- a/modules/Data_Summarization/lab/Data_Summarization_Lab.Rmd
+++ b/modules/Data_Summarization/lab/Data_Summarization_Lab.Rmd
@@ -26,7 +26,7 @@ bike <- read_csv(file = "http://jhudatascience.org/intro_to_r/data/Bike_Lanes.cs
 
 ### 1.1 
 
-How many bike "lanes" are currently in Baltimore?  You can assume each observation/row is a different bike "lane".  (hint: how do you get the number of rows of a data set? You can use `dim()` or `nrow()` or another function).
+How many streets with designated bike lanes are currently in Baltimore? You can assume each observation/row is a different street with one or more bike lanes. (Hint: how do you get the number of rows of a data set? You can use `dim()` or `nrow()` or another function).
 
 ```{r 1.1response}
 
@@ -47,7 +47,7 @@ Summarize the data to get the `max` of `length` using the `summarize` function.
 ```
 # General format 
 DATA_TIBBLE %>% 
-    summarize(SUMMARY_COLUMN_NAME = OPERATOR(SOURCE_COLUMN))
+    summarize(SUMMARY_COLUMN_NAME = FUNCTION(SOURCE_COLUMN))
 ```
 
 ```{r 1.3response}
@@ -61,8 +61,8 @@ Modify your code from 1.3 to add the `min` of `length` using the `summarize` fun
 ```
 # General format 
 DATA_TIBBLE %>% 
-    summarize(SUMMARY_COLUMN_NAME = OPERATOR(SOURCE_COLUMN),
-              SUMMARY_COLUMN_NAME = OPERATOR(SOURCE_COLUMN)
+    summarize(SUMMARY_COLUMN_NAME = FUNCTION(SOURCE_COLUMN),
+              SUMMARY_COLUMN_NAME = FUNCTION(SOURCE_COLUMN)
     )
 ```
 
@@ -80,8 +80,8 @@ Summarize the `bike` data to get the mean of `length` and `dateInstalled`. Make
 ```
 # General format 
 DATA_TIBBLE %>% 
-    summarize(SUMMARY_COLUMN_NAME = OPERATOR(SOURCE_COLUMN, na.rm = TRUE),
-              SUMMARY_COLUMN_NAME = OPERATOR(SOURCE_COLUMN, na.rm = TRUE)
+    summarize(SUMMARY_COLUMN_NAME = FUNCTION(SOURCE_COLUMN, na.rm = TRUE),
+              SUMMARY_COLUMN_NAME = FUNCTION(SOURCE_COLUMN, na.rm = TRUE)
     )
 ```
 
diff --git a/modules/Data_Summarization/lab/Data_Summarization_Lab_Key.Rmd b/modules/Data_Summarization/lab/Data_Summarization_Lab_Key.Rmd
index 3f337d869..0fd5b3022 100644
--- a/modules/Data_Summarization/lab/Data_Summarization_Lab_Key.Rmd
+++ b/modules/Data_Summarization/lab/Data_Summarization_Lab_Key.Rmd
@@ -26,7 +26,7 @@ bike <- read_csv(file = "http://jhudatascience.org/intro_to_r/data/Bike_Lanes.cs
 
 ### 1.1 
 
-How many bike "lanes" are currently in Baltimore?  You can assume each observation/row is a different bike "lane".  (hint: how do you get the number of rows of a data set? You can use `dim()` or `nrow()` or another function).
+How many streets with designated bike lanes are currently in Baltimore? You can assume each observation/row is a different street with one or more bike lanes. (Hint: how do you get the number of rows of a data set? You can use `dim()` or `nrow()` or another function).
 
 ```{r 1.1response}
 nrow(bike)
@@ -54,7 +54,7 @@ Summarize the data to get the `max` of `length` using the `summarize` function.
 ```
 # General format 
 DATA_TIBBLE %>% 
-    summarize(SUMMARY_COLUMN_NAME = OPERATOR(SOURCE_COLUMN))
+    summarize(SUMMARY_COLUMN_NAME = FUNCTION(SOURCE_COLUMN))
 ```
 
 ```{r 1.3response}
@@ -70,8 +70,8 @@ Modify your code from 1.3 to add the `min` of `length` using the `summarize` fun
 ```
 # General format 
 DATA_TIBBLE %>% 
-    summarize(SUMMARY_COLUMN_NAME = OPERATOR(SOURCE_COLUMN),
-              SUMMARY_COLUMN_NAME = OPERATOR(SOURCE_COLUMN)
+    summarize(SUMMARY_COLUMN_NAME = FUNCTION(SOURCE_COLUMN),
+              SUMMARY_COLUMN_NAME = FUNCTION(SOURCE_COLUMN)
     )
 ```
 
@@ -92,8 +92,8 @@ Summarize the `bike` data to get the mean of `length` and `dateInstalled`. Make
 ```
 # General format 
 DATA_TIBBLE %>% 
-    summarize(SUMMARY_COLUMN_NAME = OPERATOR(SOURCE_COLUMN, na.rm = TRUE),
-              SUMMARY_COLUMN_NAME = OPERATOR(SOURCE_COLUMN, na.rm = TRUE)
+    summarize(SUMMARY_COLUMN_NAME = FUNCTION(SOURCE_COLUMN, na.rm = TRUE),
+              SUMMARY_COLUMN_NAME = FUNCTION(SOURCE_COLUMN, na.rm = TRUE)
     )
 ```
 

From 48d8d4b3d45c368a273f67976995426ff32897d0 Mon Sep 17 00:00:00 2001
From: clifmckee <clifton.mckee@gmail.com>
Date: Tue, 7 Jan 2025 13:29:49 -0500
Subject: [PATCH 2/5] Update Data_Summarization.Rmd

---
 .../Data_Summarization/Data_Summarization.Rmd | 127 +++++++++---------
 1 file changed, 66 insertions(+), 61 deletions(-)

diff --git a/modules/Data_Summarization/Data_Summarization.Rmd b/modules/Data_Summarization/Data_Summarization.Rmd
index 8152a08cd..424d2d0ac 100644
--- a/modules/Data_Summarization/Data_Summarization.Rmd
+++ b/modules/Data_Summarization/Data_Summarization.Rmd
@@ -99,7 +99,9 @@ sum(z)
 
 ## Some examples
 
-We can use the `mtcars` built-in dataset.  The `head` command displays the first rows of an object:
+We can use the `mtcars` built-in dataset. "The data was extracted from the 1974 Motor Trend US magazine, and comprises fuel consumption and 10 aspects of automobile design and performance for 32 automobiles (1973-74 models)."
+
+The `head` command displays the first rows of an object:
 
 ```{r}
 head(mtcars)
@@ -112,23 +114,10 @@ A nice and readable way to chain together multiple R functions.
 
 Changes `f(x, y)` to `x %>% f(y)`.
 
-```{r eval=FALSE}
-# Going to work
-get_dressed(me,
-            pack_lunch(
-              check_pockets(
-                wallet = TRUE, phone = TRUE, keys = TRUE),
-              items = c("sandwich", "chips", "apple"), lunchbox = TRUE),
-            pants = TRUE, shirt = TRUE, footwear = "sandals")
-
-# Going to work, the tidy way
-me %>%
-  get_dressed(pants = TRUE, shirt = TRUE, footwear = "sandals") %>%
-  pack_lunch(items = c("sandwich", "chips", "apple"), lunchbox = TRUE) %>%
-  check_pockets(wallet = TRUE, phone = TRUE, keys = TRUE)
+```{r, out.width = "50%", echo = FALSE, align = "center"}
+knitr::include_graphics("../../images/lol/morning_1.png")
 ```
 
-
 ## Statistical summarization the "tidy" way
 
 ```{r}
@@ -141,7 +130,7 @@ mtcars %>% pull(wt) %>% quantile(probs = 0.6)
 
 ## Behavior of `pull()` function
 
-`pull()` converts a single data column into a vector. This allows you to run summary functions on these data. Once you have "pulled" the data column out, you don't have to name it again in any piped summary functions.
+`pull()` converts a single data column into a <span style="color:blue">vector</span>. This allows you to run summary functions on these data. Once you have "pulled" the data column out, you don't have to name it again in any piped summary functions.
 
 ```{r}
 cars_wt <- mtcars %>% pull(wt)
@@ -157,18 +146,29 @@ mtcars %>% pull(wt) %>% range(wt) # Incorrect
 mtcars %>% pull(wt) %>% range() # Correct
 ```
 
+## GUT CHECK
+
+What kind of object do we need to run summary operators like `mean()` ?
+
+A. A vector of numbers
+
+B. A vector of characters
+
+C. A dataset
 
 # Summarization on tibbles (data frames)
 
-## TB Incidence 
+## TB incidence
 
 Let's read in a `tibble` of values from TB incidence.
 
+"Tuberculosis incidence, all forms (per 100,000 population per year), for the period 1990-2007 across 208 countries/territories."
+
 ```{r}
 tb <- read_csv("https://jhudatascience.org/intro_to_r/data/tb.csv")
 ```
 
-## TB Incidence 
+## TB incidence 
 
 Check out the data:
 
@@ -177,7 +177,7 @@ head(tb)
 ```
 
 
-## TB Incidence 
+## TB incidence 
 
 Check out the data:
 
@@ -291,10 +291,11 @@ summary(tb)
 
 ## Summary & Lab Part 1
 
-- summary stats (`mean()`) work with `pull()`
+- `pull()` creates a *vector*
 - don't forget the `na.rm = TRUE` argument!
 - `summary(x)`: quantile information
 - `summarize`: creates a summary table of columns of interest
+- summary stats (`mean()`) work with vectors or with `summarize()`
 
 🏠 [Class Website](https://jhudatascience.org/intro_to_r/)
 
@@ -306,6 +307,10 @@ summary(tb)
 Here we will be using the Youth Tobacco Survey data:
 http://jhudatascience.org/intro_to_r/data/Youth_Tobacco_Survey_YTS_Data.csv
 
+"The YTS was developed to provide states with comprehensive data on both middle school and high school students regarding tobacco use, exposure to environmental tobacco smoke, smoking cessation, school curriculum, minors' ability to purchase or otherwise obtain tobacco products, knowledge and attitudes about tobacco, and familiarity with pro-tobacco and anti-tobacco media messages."
+
+* Check out the data at: https://catalog.data.gov/dataset/youth-tobacco-survey-yts-data
+
 ```{r}
 yts <- read_csv("http://jhudatascience.org/intro_to_r/data/Youth_Tobacco_Survey_YTS_Data.csv")
 head(yts)
@@ -324,7 +329,7 @@ yts %>%
 
 ## How many `distinct()` values?
 
-`n_distinct()` tells you the number of unique elements. _Must pull the column first!_
+`n_distinct()` tells you the number of unique elements. It needs a vector so you _must pull the column first!_
 
 ```{r}
 yts %>%
@@ -338,7 +343,7 @@ options(max.print = 1000)
 ```
 
 
-## `dplyr`: `count` 
+## Use `count()` to return row count per category.
 
 Use `count` to return a frequency table of unique elements of a data.frame.
 
@@ -347,31 +352,33 @@ yts %>% count(LocationDesc)
 ```
 
 
-## `dplyr`: `count` 
-
-Multiple columns listed further subdivides the count.
+## Multiple columns listed further subdivides the `count()`
 
 ```{r, message = FALSE}
 yts %>% count(LocationDesc, TopicDesc)
 ```
 
+**Note:** `count()` includes NAs
 
-## `dplyr`: `count` 
-
-Multiple columns listed further subdivides the count.
+## GUT CHECK
 
-```{r, message = FALSE}
-yts %>% count(LocationDesc, TopicDesc)
-```
+The `count()` function can help us tally:
 
-<br>
+A. Sample size
 
-**Note:** `count()` includes NAs
+B. Rows per each category
 
+C. How many categories
 
 # Grouping
 
-## Perform Operations By Groups: dplyr
+## Goal 
+
+We want to find the average frequency that youth use tobacco products in the dataset.
+
+_How do we do this?_
+
+## Perform operations By groups: dplyr
 
 `group_by` allows you group the data set by variables/columns you specify:
 
@@ -381,7 +388,7 @@ yts
 ```
 
 
-## Perform Operations By Groups: dplyr
+## Perform operations by groups: dplyr
 
 `group_by` allows you group the data set by variables/columns you specify:
 
@@ -400,7 +407,7 @@ yts_grouped %>% summarize(avg_percent = mean(Data_Value, na.rm = TRUE))
 ```
 
 
-## Use the `pipe` to string these together!
+## Do it in one step: use `%>%` to string these together!
 
 Pipe `yts` into `group_by`, then pipe that into `summarize`:
 
@@ -474,8 +481,8 @@ yts %>%
 `count()` and `n()` can give very similar information.
 
 ```{r}
-mtcars %>% count(cyl)
-mtcars %>% group_by(cyl) %>% summarize(n()) # n() typically used with summarize
+yts %>% count(YEAR)
+yts %>% group_by(YEAR) %>% summarize(n = n()) # n() typically used with summarize
 ```
 
 
@@ -487,7 +494,6 @@ mtcars %>% group_by(cyl) %>% summarize(n()) # n() typically used with summarize
 These functions require a column as a vector using `pull()`.
 
 ```{r, message = FALSE}
-yts <- read_csv("http://jhudatascience.org/intro_to_r/data/Youth_Tobacco_Survey_YTS_Data.csv")
 yts_loc <- yts %>% pull(LocationDesc) # pull() to make a vector
 yts_loc %>% unique() # similar to distinct()
 ```
@@ -500,27 +506,6 @@ These functions require a column as a vector using `pull()`.
 yts_loc %>% unique() %>% length() # similar to n_distinct()
 ```
 
-## * New! * Many dplyr functions now have a `.by=` argument
-
-Pipe `yts` into `group_by`, then pipe that into `summarize`:
-
-```{r eval = FALSE}
-yts %>%
-  group_by(Response) %>%
-  summarize(avg_percent = mean(Data_Value, na.rm = TRUE),
-            max_percent = max(Data_Value, na.rm = TRUE))
-```
-
-is the same as..
-
-```{r eval = FALSE}
-yts %>%
-  summarize(avg_percent = mean(Data_Value, na.rm = TRUE),
-            max_percent = max(Data_Value, na.rm = TRUE),
-            .by = Response)
-```
-
-
 ## `summary()` vs. `summarize()`
 
 * `summary()` (base R) gives statistics table on a dataset. 
@@ -592,3 +577,23 @@ tb %>%
 tb %>%
   summarize(across(starts_with("year"), ~mean(.x, na.rm = TRUE)))
 ```
+
+## * New! * Many dplyr functions now have a `.by=` argument
+
+Pipe `yts` into `group_by`, then pipe that into `summarize`:
+
+```{r eval = FALSE}
+yts %>%
+  group_by(Response) %>%
+  summarize(avg_percent = mean(Data_Value, na.rm = TRUE),
+            max_percent = max(Data_Value, na.rm = TRUE))
+```
+
+is the same as..
+
+```{r eval = FALSE}
+yts %>%
+  summarize(avg_percent = mean(Data_Value, na.rm = TRUE),
+            max_percent = max(Data_Value, na.rm = TRUE),
+            .by = Response)
+```

From f0c983d2bc07a59f6e7c2405cc9fbdc293bd916c Mon Sep 17 00:00:00 2001
From: clifmckee <clifton.mckee@gmail.com>
Date: Tue, 7 Jan 2025 18:07:00 -0500
Subject: [PATCH 3/5] Update Data_Summarization.Rmd

---
 .../Data_Summarization/Data_Summarization.Rmd  | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/modules/Data_Summarization/Data_Summarization.Rmd b/modules/Data_Summarization/Data_Summarization.Rmd
index 424d2d0ac..29259778f 100644
--- a/modules/Data_Summarization/Data_Summarization.Rmd
+++ b/modules/Data_Summarization/Data_Summarization.Rmd
@@ -8,11 +8,8 @@ output:
 
 
 ```{r, echo = FALSE, message=FALSE, error = FALSE}
-library(knitr)
-opts_chunk$set(comment = "", message = FALSE)
-suppressWarnings({library(dplyr)})
-library(readr)
-library(tidyverse)
+knitr::opts_chunk$set(comment = "", message = FALSE)
+suppressWarnings(library(tidyverse))
 ```
 
 <style type="text/css">
@@ -193,7 +190,6 @@ Before we go further, let's rename the first column using the `rename()` functio
 In this case, we have to use the backticks (\`) because there are spaces and funky characters in the name. 
 
 ```{r}
-library(dplyr)
 tb <- tb %>%
   rename(country = `TB incidence, all forms (per 100 000 population per year)`)
 ```
@@ -511,6 +507,15 @@ yts_loc %>% unique() %>% length() # similar to n_distinct()
 * `summary()` (base R) gives statistics table on a dataset. 
 * `summarize()` (dplyr) creates a more customized summary tibble/dataframe.
 
+## Functions you might also see
+
+* `rowwise`()`: functions will compute results for each row
+* `sum(!is.na())`: # of non-NAs in the data
+* `first()`: first value in the data
+* `last()`: last value in the data
+* `range()`: minimum and maximum of the data
+* `IQR()`: interquartile range of the data
+
 ## Summary & Lab Part 2
 
 - `count(x)`: what unique values do you have? 
@@ -519,6 +524,7 @@ yts_loc %>% unique() %>% length() # similar to n_distinct()
 - `group_by()`: changes all subsequent functions
   - combine with `summarize()` to get statistics per group
   - combine with `mutate()` to add column
+  - `ungroup()` to remove a grouping
 - `summarize()` with `n()` gives the count (NAs included) 
 
 🏠 [Class Website](https://jhudatascience.org/intro_to_r/)

From 6cd0878944b18134a557bf503614b194a971b8a6 Mon Sep 17 00:00:00 2001
From: clifmckee <clifton.mckee@gmail.com>
Date: Tue, 7 Jan 2025 18:19:36 -0500
Subject: [PATCH 4/5] Update Data_Summarization.Rmd

---
 modules/Data_Summarization/Data_Summarization.Rmd | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/modules/Data_Summarization/Data_Summarization.Rmd b/modules/Data_Summarization/Data_Summarization.Rmd
index 29259778f..54885966e 100644
--- a/modules/Data_Summarization/Data_Summarization.Rmd
+++ b/modules/Data_Summarization/Data_Summarization.Rmd
@@ -35,7 +35,7 @@ pre { /* Code block - slightly smaller in this lecture */
 
 https://raw.githubusercontent.com/rstudio/cheatsheets/main/data-transformation.pdf
 
-```{r, fig.alt="A preview of the Data transformation cheatsheet produced by RStudio.", out.width = "80%", echo = FALSE, align = "center"}
+```{r, fig.alt="A preview of the Data transformation cheatsheet produced by RStudio.", out.width = "80%", echo = FALSE, fig.align = "center"}
 knitr::include_graphics("images/Manip_cheatsheet.png")
 ```
 
@@ -111,7 +111,7 @@ A nice and readable way to chain together multiple R functions.
 
 Changes `f(x, y)` to `x %>% f(y)`.
 
-```{r, out.width = "50%", echo = FALSE, align = "center"}
+```{r, out.width = "50%", echo = FALSE, fig.align = "center"}
 knitr::include_graphics("../../images/lol/morning_1.png")
 ```
 
@@ -298,7 +298,7 @@ summary(tb)
 💻 [Lab](https://jhudatascience.org/intro_to_r/modules/Data_Summarization/lab/Data_Summarization_Lab.Rmd)
 
 
-## Youth Tobacco Survey
+## Youth Tobacco Survey{.codesmall}
 
 Here we will be using the Youth Tobacco Survey data:
 http://jhudatascience.org/intro_to_r/data/Youth_Tobacco_Survey_YTS_Data.csv
@@ -477,8 +477,8 @@ yts %>%
 `count()` and `n()` can give very similar information.
 
 ```{r}
-yts %>% count(YEAR)
-yts %>% group_by(YEAR) %>% summarize(n = n()) # n() typically used with summarize
+yts %>% count(YEAR) %>% head(n = 3)
+yts %>% group_by(YEAR) %>% summarize(n = n()) %>% head(n = 3) # n() typically used with summarize
 ```
 
 
@@ -516,7 +516,7 @@ yts_loc %>% unique() %>% length() # similar to n_distinct()
 * `range()`: minimum and maximum of the data
 * `IQR()`: interquartile range of the data
 
-## Summary & Lab Part 2
+## Summary & Lab Part 2{.codesmall}
 
 - `count(x)`: what unique values do you have? 
   - `distinct()`: what are the distinct values?
@@ -531,7 +531,7 @@ yts_loc %>% unique() %>% length() # similar to n_distinct()
 
 💻 [Lab](https://jhudatascience.org/intro_to_r/modules/Data_Summarization/lab/Data_Summarization_Lab.Rmd)
 
-```{r, fig.alt="The End", out.width = "50%", echo = FALSE, fig.align='center'}
+```{r, fig.alt="The End", out.width = "25%", echo = FALSE, fig.align='center'}
 knitr::include_graphics(here::here("images/the-end-g23b994289_1280.jpg"))
 ```
 

From 101ba6122374fc573481ab9d0c8c48d1654f7dc4 Mon Sep 17 00:00:00 2001
From: clifmckee <clifton.mckee@gmail.com>
Date: Tue, 7 Jan 2025 18:29:00 -0500
Subject: [PATCH 5/5] Update Data_Summarization.Rmd

---
 modules/Data_Summarization/Data_Summarization.Rmd | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/modules/Data_Summarization/Data_Summarization.Rmd b/modules/Data_Summarization/Data_Summarization.Rmd
index 54885966e..091458002 100644
--- a/modules/Data_Summarization/Data_Summarization.Rmd
+++ b/modules/Data_Summarization/Data_Summarization.Rmd
@@ -298,13 +298,11 @@ summary(tb)
 💻 [Lab](https://jhudatascience.org/intro_to_r/modules/Data_Summarization/lab/Data_Summarization_Lab.Rmd)
 
 
-## Youth Tobacco Survey{.codesmall}
+## Youth Tobacco Survey
 
 Here we will be using the Youth Tobacco Survey data:
 http://jhudatascience.org/intro_to_r/data/Youth_Tobacco_Survey_YTS_Data.csv
 
-"The YTS was developed to provide states with comprehensive data on both middle school and high school students regarding tobacco use, exposure to environmental tobacco smoke, smoking cessation, school curriculum, minors' ability to purchase or otherwise obtain tobacco products, knowledge and attitudes about tobacco, and familiarity with pro-tobacco and anti-tobacco media messages."
-
 * Check out the data at: https://catalog.data.gov/dataset/youth-tobacco-survey-yts-data
 
 ```{r}
@@ -482,7 +480,7 @@ yts %>% group_by(YEAR) %>% summarize(n = n()) %>% head(n = 3) # n() typically us
 ```
 
 
-# A few miscellaneous topics .. 
+# A few miscellaneous topics
 
 
 ## Base R functions you might see: `length` and `unique`
@@ -516,22 +514,21 @@ yts_loc %>% unique() %>% length() # similar to n_distinct()
 * `range()`: minimum and maximum of the data
 * `IQR()`: interquartile range of the data
 
-## Summary & Lab Part 2{.codesmall}
+## Summary & Lab Part 2
 
 - `count(x)`: what unique values do you have? 
   - `distinct()`: what are the distinct values?
   - `n_distinct()` with `pull()`: how many distinct values?
-- `group_by()`: changes all subsequent functions
+- `group_by()`: changes subsequent functions (remove with `ungroup()`)
   - combine with `summarize()` to get statistics per group
   - combine with `mutate()` to add column
-  - `ungroup()` to remove a grouping
 - `summarize()` with `n()` gives the count (NAs included) 
 
 🏠 [Class Website](https://jhudatascience.org/intro_to_r/)
 
 💻 [Lab](https://jhudatascience.org/intro_to_r/modules/Data_Summarization/lab/Data_Summarization_Lab.Rmd)
 
-```{r, fig.alt="The End", out.width = "25%", echo = FALSE, fig.align='center'}
+```{r, fig.alt="The End", out.width = "20%", echo = FALSE, fig.align='center'}
 knitr::include_graphics(here::here("images/the-end-g23b994289_1280.jpg"))
 ```