-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLearn.scala
98 lines (83 loc) · 3.7 KB
/
Learn.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
package exec
import java.io.File
import scala.util.{ Try, Success, Failure }
import org.apache.commons.io.FileUtils
import io.{ CombineVarParam, ReadAlgo, ReadParam, ReadVar }
import rkhs.{ GramOpti, KerEval }
import various.Def
import various.TypeDef._
import exec.learn.KMeans
import exec.learn.OfflineChangePoint
import exec.learn.Regression
import exec.learn.SVM
import exec.learn.TwoSampleTest
object Learn {
val algoFileName = "algo.csv"
val dataFileName = "learnData.csv"
val descFileName = "desc.csv"
val gramOptiName = "gramOpti"
case class AlgoParam(algo: Map[String, String], kerEval: KerEval, rootFolder: String)
/**
* @return string that is empty on success, or that contains a description of the problems.
*/
def main(rootFolder: String): Unit = {
val algoFile = rootFolder + Def.folderSep + algoFileName
val dataFile = rootFolder + Def.folderSep + dataFileName // the data used in the KerEval is always the data from the learning phase
val descFile = rootFolder + Def.folderSep + descFileName
val readAll = for {
algo <- ReadAlgo.readAndParseFile(algoFile)
(data, nObs) <- ReadVar.readAndParseVars(dataFile)
cg <- cacheGram(algo, nObs)
param <- ReadParam.readAndParseParam(descFile)
kerEval <- CombineVarParam.generateGlobalKerEval(nObs, 0, data, param, cg) // the assumption here is that every algorithm need the complete Gram matrix
} yield AlgoParam(algo, kerEval, rootFolder)
val res = readAll.flatMap(callAlgo)
res match {
case Success(_) =>
case Failure(m) => FileUtils.writeStringToFile(new File(rootFolder + Def.folderSep + "error.txt"), m.toString, "UTF-8")
}
}
/**
* Call the correct algorithm.
*
* An algorithm will never return anything, but instead write its result on the hard drive. Thrown exception
* are however captured and managed properly, hence the Try[Unit] return type.
*/
def callAlgo(param: AlgoParam): Try[Unit] =
algoExistence(param).flatMap(a => a.algo("algo") match {
case "offlinechangepoint" => OfflineChangePoint.main(a)
case "twosampletest" => TwoSampleTest.main(a)
case "kmeans" => KMeans.main(a)
case "regression" => Regression.main(a)
case "svm" => SVM.main(a)
case _ => Failure(new Exception(s"Learn mode not available for algorithm $a."))
})
def algoExistence(param: AlgoParam): Try[AlgoParam] = {
if (param.algo.contains("algo"))
Success(param)
else
Failure(new Exception(s"Algorithm name not found in $algoFileName."))
}
def parseParam(str: String): Try[(String, String)] = {
val paramPattern = raw"([a-zA-Z0-9]+)\((.*)\)".r
val t = Try({ val t = paramPattern.findAllIn(str); (t.group(1), t.group(2)) })
t match {
case Success(_) => t
case Failure(_) => Failure(new Exception(str + " is not a valid parameter String")) // default exception for pattern matching is not expressive enough
}
}
def cacheGram(param: Map[String, String], nObs: Index): Try[GramOpti] = {
val paramPattern = raw"([a-zA-Z0-9]+)\((.*)\)".r
if (param.contains(gramOptiName)) {
val rawStr = param(gramOptiName)
val t = Try({ val t = paramPattern.findAllIn(rawStr); (t.group(1), t.group(2)) })
t match {
case Success(("Direct", "")) => Success(new GramOpti.Direct)
case Success(("Cache", "")) => Success(new GramOpti.Cache)
case Success(("LowRank", m)) => Try(m.toIndex).map(GramOpti.LowRank)
case _ => Failure(new Exception(s"Could not parse $gramOptiName entry: $rawStr. If there are no parameters, do not forget the trailing empty parenthesis, as in Direct(), for example."))
}
} else
Failure(new Exception(s"$algoFileName must define $gramOptiName"))
}
}