-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathrcode_v0.R
102 lines (79 loc) · 2.75 KB
/
rcode_v0.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#######################################
#### Discovery step of the CRE method
#######################################
## Read the dataset
dataset = read.csv("simulated_dataset.csv")
y = dataset[,1] # outcome
z = dataset[,2] # treatment
X = dataset[,3:12] # 10 covariates
## There are 10 covariates (x1, x2, ..., x10).
## The correlation between x1, x2, ..., x10 is 0.1.
## The true propensity score model log(p/(1-p)) = -1 + x1 - x2 + x3.
## treatment z is generated based on a Bernoulli distribution with p.
## The outcome follows a normal distribution.
## There is effect modification by x1, x2, x3
## The potential outcome y(0) ~ N(0,1)
## y(1) = y(0) + tau
## If x1=0 & x2=0, tau=2
## If x1=1 & x3=1, tau=-2
## Otherwise, tau=0
#######################################
source("causal_rulefit_functions.R")
## 1. Rule generation
### 1.1. obtain an estimate for ITE
#### Use BCF
library(bcf)
X = as.matrix(X)
propscore.model = glm(z ~ X, family = binomial)
logit.ps = predict(propscore.model)
est.ps = exp(logit.ps)/(1+exp(logit.ps))
bcf.model = bcf(y, z, X, X, est.ps, nburn = 100, nsim = 1000)
tau.est = colMeans(bcf.model$tau)
### 1.2. fit tree ensemble
### 1.3. extract decision rules
library(randomForest)
library(gbm)
library(inTrees)
library(xgboost)
S=20
L=5
ntree=200
### rule extraction & regularization!
y.temp = tau.est
muy = mean(y.temp)
sdy = sd(y.temp)
yz = (y.temp-muy)/sdy # standardize the treatment effect
rulesf = genrulesRF(X, yz, nt=ntree, S=S, L=L) # rule set from RF
rules.gbm = genrulesGBM(X, yz, nt=ntree, S=S, L=L) # rule set from GBM
# combine the extracted rule sets
rulesf = c( rulesf, rules.gbm)
minsup=0.025 # minimum support
dt = createX.take1(X= X, rules = rulesf, t = minsup)
#dt = createX.disjoint(X= X, rules = rulesf, t = minsup)
Xr = dt[[1]]
rulesFin = dt[[2]]
## 2. Rule-regularization
### 2.1. Generate tilde{X}
std.Xr = Xr
mur = apply(Xr, 2, mean)
sdr = apply(Xr, 2, sd)
for(l in 1:dim(Xr)[2]){
std.Xr[,l] = (Xr[,l]-mur[l])/sdr[l]
}
### 2.2. Penalized regression
#### 2.2.1. LASSO
library(glmnet)
lambda = 10^seq(10, -2, length = 100)
lasso.mod = glmnet(std.Xr, yz, alpha = 1, lambda = lambda, intercept = FALSE)
cv.lasso = cv.glmnet(std.Xr, yz, alpha = 1, intercept = FALSE)
bestlam = cv.lasso$lambda.min
aa=coef(cv.lasso, s=cv.lasso$lambda.1se)
index.aa = which(aa[-1,1]!=0)
rule.LASSO = data.frame(rules = rulesFin[index.aa], val = aa[index.aa+1, 1])
rule.LASSO[order(-rule.LASSO[,2]), ]
#### 2.2.2. Stability selection
library(stabs)
stab.mod = stabsel(std.Xr, yz, fitfun = glmnet.lasso, cutoff = 0.8, PFER=1, args.fitfun = list(type = "conservative"))
plot(stab.mod, main = "Lasso")
rule.stab = rulesFin[stab.mod$selected]
rule.stab