-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUnit 3.Rmd
103 lines (72 loc) · 3.17 KB
/
Unit 3.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
---
title: "Unit 3 backup"
output: html_document
editor_options:
chunk_output_type: console
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## R Markdown
```
# Install packages and libraries used
# install.packages("GGally")
library(GGally)
library(tidyverse)
# library(ggplot2)
# library(lubridate)
# Import CSV file for FIFA data
fifa = read.csv(file="03-School/DS 6306/Unit3/FIFA Players.csv", header=TRUE, sep=",")
# Create only Left Mid-fielder and Left Forward data only
LMLF <- fifa %>% filter(Position == "LF" | Position == "LM")
# LMLF$Position = as.factor(as.character(LMLF$Position))
# LMLF<-as.data.frame(LMLF)
# summary(LMLF$Position)
# class(LMLF$Position)
LMLF %>% select(Acceleration, Agility, Position) %>% ggpairs(aes(color = Position))
# LMLF %>% select(Acceleration, Agility) %>% ggpairs()
# Filter by individual position of either Left Mid-fielder or Left Forward
LM <- LMLF %>% filter(Position == "LM")
LF <- LMLF %>% filter(Position == "LF")
# Compute the mean, standard deviation and count for Left Forwards
MALF <- fifa %>%
filter(Position == "LF") %>%
summarize(mean = mean(Agility), sd = sd(Agility), count = n())
# Compute the mean, standard deviation and count for Left Mid-fielders
MALM <- fifa %>%
filter(Position == "LM") %>%
summarize(mean = mean(Agility), sd = sd(Agility), count = n())
# Create only the agility values of each position
LMAgility <- LM$Agility
LFAgility <- LF$Agility
# Create histograms for each position LF or LM
hist(LFAgility, col = "firebrick3")
hist(LMAgility, col = "skyblue2")
############################################################
# Import car data
cars = mtcars %>% rownames_to_column()
# Calculate the expected elapsed time
Expected <- cars %>% mutate(expect = (6.290*'^'((wt*1000 / hp),1/3)))
# Plot the first graph showing the data and performance ratio
Expected %>% ggplot(aes(x = expect, y = qsec)) + geom_point() + geom_smooth(method = "lm") + ggtitle("Plot of Calculated E.T. vs Observed E.T.") +
xlab("Calculated Elapsed Time") + ylab("Observed Elapsed Time")
# Begin to see if the car will be over performing (fast) or under performing (slower)
performance <- Expected %>% mutate(perfratio = expect / qsec)
# Show a histogram of performance ratio
hist(performance$perfratio, col = "red")
# compute the breaks / cuts in the data for slower than expected, expected and faster than expected
performance <- performance %>%
mutate(perfcar = cut(perfratio, breaks = c(0,0.97,1.03,2), labels =
c("Slower Performance","Expected", "Faster Peformance")))
# plot the new data points with slow, expected and faster
performance %>% ggplot(aes(x = expect, y = qsec, fill = perfcar)) + geom_point() + geom_smooth(method = "lm") + ggtitle("Plot of Calculated E.T. vs Observed E.T.") +
xlab("Calculated Elapsed Time") + ylab("Observed Elapsed Time")
# plot the performance vs cylinders
performance %>%
ggplot(aes(x = perfcar, y = cyl)) +
geom_boxplot(fill='lightblue', color="darkred")
# plot the performance vs carburetors
performance %>%
ggplot(aes(x = perfcar, y = carb)) +
geom_boxplot(fill='lightblue', color="darkred")
```