-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoct_submitTimePlots.R
99 lines (85 loc) · 4.07 KB
/
oct_submitTimePlots.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
###### Working on the Oct 7 data
# MSSG
# 10-7-2015
## Set up for one row, 2 cols in the plot
par(mfrow=c(1,2))
# readdat = 1 # If we need to read in the data
readdat = 0
if (readdat == 1) {
## Invites table
mi=read.csv("/Users/m/mozmed/language_R_AndDataAnalysisCourse/R-lengua-larnin/10-07-15.intakeinvites.cleaned.csv")
## Nodes table
mn=read.csv("/Users/m/mozmed/language_R_AndDataAnalysisCourse/R-lengua-larnin/10-07-15.nodes.cleaned.csv")
## User table
mu = read.csv("/Users/m/mozmed/language_R_AndDataAnalysisCourse/R-lengua-larnin/10-07-15.users.cleaned.csv")
## Userforms table
muf = read.csv("/Users/m/mozmed/language_R_AndDataAnalysisCourse/R-lengua-larnin/10-07-15.userforms.cleaned.csv")
}
#### Make plots from invites table
plottype = 'subtimes' # To plot stuff from invites dataset -- Like conversion rate info
# plottype = 'none'
if (plottype == 'subtimes'){
############# Dif in times
# Get the sent times into a vector of timedate objs
{
mi$rid = as.numeric(as.character(mi$receiver_userid)) # This will give: Warning message:
# In eval(expr, envir, enclos) : NAs introduced by coercion -- from the Nulls when converted (i believe)
# But is needed to properly do the merge in the next line, by converting a factor to a double
mergeddat = merge(muf,mi,by.x="userid",by.y="rid")
mufi= mergeddat
md = mufi[mufi$type == 'intake' ,] # Pick only intake users -- note ending comma
mdcomplete = mufi[mufi$type == 'intake' && mufi$iscomplete == 1,] # Pick only intake users -- note ending comma
dim(md) # How many in this table -- note, dim(md) = dim(mdcomplete)
}
############## Get difs in sent, claim, submission times and put into vars
if (1==0) {
# Get the sent times into a vector of timedate objs
{
senttimes = md$sent_at # Get sent times
senttimesStr = as.character(senttimes) # Convert this list to strs
senttimesAstime = strptime(senttimesStr, format="%m/%d/%y %H:%M") # This is what converts the string into an
# actual datetime obj, the strptime function takes the str however it has been
# formatted, and you tell it the form in quotes. %H stands for hours in 24-hr format, %M is minutes.
# Do same for claimed times
clmtimes = md$claimed_at
clmtimesStr = as.character(clmtimes)
clmtimesAstime = strptime(clmtimesStr, format="%m/%d/%y %H:%M")
# Do same for claimed times
subtimes = md$submitted_at
subtimesStr = as.character(subtimes)
subtimesAstime = strptime(subtimesStr, format="%m/%d/%y %H:%M")
}
# Now take the dif, and plot as histos
{
td = (subtimesAstime - senttimesAstime) / 3600 # Divide by 3600 to get hrs
hrtimes = (as.numeric(na.omit(td))/24) # Omit the NA's (non-claimed ones), and convert to hours and real nums
hrtimes = (hrtimes[hrtimes > 0])
totnum = length(hrtimes)
histname =paste("Subm Minus Sent Time: N = ",as.character(totnum))
hrhist = hist(hrtimes, main= histname, xlab="Time Dif (hours)", ylab = "Number", col='blue')
# First hour
firsthrTimes = (hrtimes[hrtimes < 1]) # Pick stuff that is only in the first hr
totnum = length(firsthrTimes)
histname =paste("First hr: Subm Minus Sent: N = ",as.character(totnum))
fhrhist = hist(firsthrTimes, main= histname, xlab="Time Dif (hours)", ylab = "Number", col='green')
}
## To plot histos of the cume sums over the time period
{
hrhist$density = cumsum(hrhist$counts)/sum(hrhist$counts)*100
totnum = length(hrtimes)
histname =paste("Claimed Minus Sent Time: N = ",as.character(totnum))
plot(hrhist,freq=F,main= histname, xlab="Time Dif (hours)", ylab = "Percentage", col='red')
## First hr cume sum
fhrhist$density = cumsum(fhrhist$counts)/sum(fhrhist$counts)*100
totnum = length(firsthrTimes)
histname =paste("First Hr, Clm Minus Sent: N = ",as.character(totnum))
plot(fhrhist,freq=F, main=histname, xlab="Time Dif (hours)", ylab = "Percentage", col='cyan')
}
# If you *don't* want percentage on the y-axis, but raw number, do:
{
hrhist$counts = cumsum(hrhist$counts)
fhrhist$counts = cumsum(fhrhist$counts)
plot(hrhist)
plot(fhrhist)
}
} # End