-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
81 lines (58 loc) · 2.53 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# Marmara University, Computer Engineering Department
# Natural Language Processing, Machine Learning Project
# Burak Aybar 150112001 & Farid Yagubbayli 150113901
import os.path
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model.logistic import LogisticRegression
from sklearn.svm.classes import LinearSVC
from batch import MultinomialNB_tester, Logit_tester, LinearSVC_tester
from util import output_to_file, file_to_features_labels, trainer, tester
import gc
# Paths to data files
FILE_TRAIN = "train.txt"
FILE_TEST = "test.txt"
# Feature sets
featureSets = [
[(-1, 0), (-1, 1), (0, 0), (0, 1)],
[(-2, 0), (-2, 1), (-1, 0), (-1, 1), (0, 0), (0, 1)],
[(-1, 1), (0, 1)],
[(-2, 1), (-1, 1)],
[(-2, 1), (-1, 1), (0, 0), (0, 1)]
]
# Execute in ordinary form, without threads and custom parameters
def ordinary_execution():
# List of classifiers
clfs = [MultinomialNB(), LogisticRegression(), LinearSVC()]
for clf in clfs:
print("Classifier: " + str(clf))
for i in range(len(featureSets)):
print("Feature Set #" + str(i))
gc.collect() # For efficient usage of the memory
# Get necessary data sets
train_features, train_labels = file_to_features_labels(FILE_TRAIN, featureSets[i])
test_features, test_labels = file_to_features_labels(FILE_TEST, featureSets[i])
# Train classifier
vectorizer, clf = trainer(train_features, train_labels, clf)
# Test classifier
results = tester(test_features, test_labels, vectorizer, clf)
print("Results: " + str(results))
print("\n")
# Execute in threaded form, with a set of custom parameters
# @classifier_tester: Instance of classifier tester from batch.py
# @classifier_label: Used when writing to the output file
def threaded_execution(classifier_tester, classifier_label):
for i in range(len(featureSets)):
gc.collect() # For efficient usage of the memory
print("Feature set: #" + str(i))
results = classifier_tester( featureSets[i])
#Write results to file
output_to_file(classifier_label, i, results)
if __name__ == "__main__":
# If output file exists, remove it
if os.path.exists("experiment_results.txt"):
os.remove("experiment_results.txt")
execution_type = "threaded" #or threaded
if execution_type == "ordinary":
ordinary_execution()
elif execution_type == "threaded":
threaded_execution(MultinomialNB_tester, "Multinomial Naive Bayes")