-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathbuild.gradle
151 lines (135 loc) · 4.03 KB
/
build.gradle
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
class Config {
String datasetName, hdfsPathInp, hdfsPathOut, localPathOut, delimiter, minSupport, minConfidence, maxPasses, filterFlag
}
class GlobalParams {
String countTxns
}
def config = new Config()
def params = new GlobalParams()
repositories {
mavenCentral()
}
apply plugin: 'java'
dependencies {
compile 'org.apache.hadoop:hadoop-common:2.7.3'
compile 'org.apache.hadoop:hadoop-mapreduce-client-core:2.7.3'
compile 'org.apache.hadoop:hadoop-core:1.2.1'
}
sourceSets {
main.java.srcDir 'src/main'
}
jar {
from configurations.compile.collect {
zipTree it
}
manifest.attributes 'Main-Class': 'com.pranit.mba.MbaDriver'
baseName 'mba'
}
task readConfig
task countTransactions(type: Exec, dependsOn: readConfig)
task deleteInpDirHdfs(type: Exec)
task copyInputToHdfs(type: Exec, dependsOn: deleteInpDirHdfs)
task deleteOutDirHdfs(type: Exec)
task runJar(type: Exec, dependsOn: deleteOutDirHdfs)
task deleteOutDirLocal(type: Delete)
task copyOutputFromHdfs(type: Exec, dependsOn: deleteOutDirLocal)
task run {
group 'Run tasks'
description 'Automates the entire process of running this project.'
dependsOn = [readConfig, countTransactions, deleteInpDirHdfs, copyInputToHdfs, deleteOutDirHdfs, runJar, deleteOutDirLocal, copyOutputFromHdfs]
countTransactions.mustRunAfter readConfig
deleteInpDirHdfs.mustRunAfter countTransactions
copyInputToHdfs.mustRunAfter deleteInpDirHdfs
deleteOutDirHdfs.mustRunAfter copyInputToHdfs
runJar.mustRunAfter deleteOutDirHdfs
deleteOutDirLocal.mustRunAfter runJar
copyOutputFromHdfs.mustRunAfter deleteOutDirLocal
}
readConfig {
outputs.upToDateWhen { false }
doFirst {
File conf = new File('./config')
if (!conf.exists()) {
logger.lifecycle("\nFile does not exist! Create a config file first\n")
}
else {
logger.lifecycle("\nConfig File exists. Reading parameters from the file\n")
def list = []
conf.eachLine { line ->
if (line.trim() && !line.startsWith("#")) {
list.add(line.trim())
}
}
config.datasetName = list[0]
config.hdfsPathInp = list[1]
config.hdfsPathOut = list[2]
config.localPathOut = list[3]
config.delimiter = list[4]
config.minSupport = list[5]
config.minConfidence = list[6]
config.maxPasses = list[7]
config.filterFlag = list[8]
}
}
}
countTransactions {
def pattern = ".*"
outputs.upToDateWhen { false }
doFirst {
logger.lifecycle("\nCounting total number of transactions in the dataset\n")
executable 'grep'
args '-c', pattern, "./dataset/$config.datasetName"
standardOutput = new ByteArrayOutputStream()
}
doLast {
params.countTxns = standardOutput.toString().tokenize('\n')[0]
}
}
deleteInpDirHdfs {
outputs.upToDateWhen { false }
doFirst {
logger.lifecycle("\nDeleting input directory in HDFS\n")
executable 'hdfs'
args 'dfs', '-rm', '-r', "$config.hdfsPathInp"
ignoreExitValue true
}
}
copyInputToHdfs {
outputs.upToDateWhen { false }
doFirst {
logger.lifecycle("\nCopying dataset from local File System to HDFS\n")
executable 'hdfs'
args 'dfs', '-put', "./dataset/$config.datasetName", "$config.hdfsPathInp"
}
}
deleteOutDirHdfs {
outputs.upToDateWhen { false }
doFirst {
logger.lifecycle("\nDeleting output directory in HDFS\n")
executable 'hdfs'
args 'dfs', '-rm', '-r', "$config.hdfsPathOut"
ignoreExitValue true
}
}
runJar {
doFirst {
logger.lifecycle("\nRunning the MapReduce program in HDFS by executing the jar file\n")
executable 'hadoop'
args 'jar', './build/libs/mba.jar', "$config.hdfsPathInp", "$config.hdfsPathOut", "$config.minSupport", "$config.minConfidence", "$params.countTxns", "$config.delimiter", "$config.maxPasses", "$config.filterFlag"
}
}
deleteOutDirLocal {
outputs.upToDateWhen { false }
doFirst {
logger.lifecycle("\nDeleting output directory in Local file system\n")
delete "$config.localPathOut/mba_output"
}
}
copyOutputFromHdfs {
outputs.upToDateWhen { false }
doFirst {
logger.lifecycle("\nCopying output directory from HDFS to local File System\n")
executable 'hdfs'
args 'dfs', '-get', "$config.hdfsPathOut", "$config.localPathOut/mba_output"
}
}