Skip to content

Commit

Permalink
removing all code not related to ibrahimsharaf#14
Browse files Browse the repository at this point in the history
  • Loading branch information
Ayatallah committed Jun 5, 2019
1 parent 49dd931 commit 418320d
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 32 deletions.
15 changes: 6 additions & 9 deletions models/classifier_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import inspect
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.externals import joblib
from .model import Model
from .doc2vec_model import doc2VecModel

Expand Down Expand Up @@ -33,17 +32,9 @@ def train_model(self, d2v, training_vectors, training_labels):

def save_model(self, filename):
logging.info("Saving trained classification model")
filename = os.path.join(classifiers_path, filename)
joblib.dump(self.model, filename)

def load_model(self, filename):
logging.info("Loading trained classification model")
filename = os.path.join(classifiers_path, filename)
if (os.path.isfile(filename)):
loaded_model = joblib.load(filename)
self.model = loaded_model
else:
self.model = None

def test_model(self, d2v, testing_vectors, testing_labels):
logging.info("Classifier testing")
Expand All @@ -52,3 +43,9 @@ def test_model(self, d2v, testing_vectors, testing_labels):
logging.info('Testing predicted classes: {}'.format(np.unique(testing_predictions)))
logging.info('Testing accuracy: {}'.format(accuracy_score(testing_labels, testing_predictions)))
logging.info('Testing F1 score: {}'.format(f1_score(testing_labels, testing_predictions, average='weighted')))

def predict(self, d2v, testing_vectors):
logging.info("Classifier Predicting")
test_vectors = doc2VecModel.get_vectors(d2v, len(testing_vectors), 300, 'Test')
testing_predictions = self.model.predict(test_vectors)
logging.info(testing_predictions)
40 changes: 17 additions & 23 deletions text_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import sys, getopt
import os, inspect
import numpy as np
from sklearn.model_selection import train_test_split
from models.doc2vec_model import doc2VecModel
from models.classifier_model import classifierModel
Expand All @@ -10,6 +11,9 @@
base_file_path = inspect.getframeinfo(inspect.currentframe()).filename
project_dir_path = os.path.dirname(os.path.abspath(base_file_path))
data_path = os.path.join(project_dir_path, 'data')
default_classifier = os.path.join(project_dir_path, 'classifiers','joblib_model.pkl')
default_doc2vec= os.path.join(project_dir_path, 'classifiers','d2v.model')
default_dataset= os.path.join(data_path, 'dataset.csv')

class TextClassifier():

Expand All @@ -31,48 +35,38 @@ def prepare_all_data(self):
all_data = x_train + x_test
return x_train, x_test, y_train, y_test, all_data

# def prepare_test_data(self):
# x_test = doc2VecBuilder.label_sentences(self.dataset.review, 'Test')
# y_test = self.dataset.sentiment
# return x_test, y_test
def prepare_test_data(self,sentence):
x_test = doc2VecModel.label_sentences(sentence, 'Test')
return x_test

def train_classifier(self, d2v_file, classifier_file):
def train_classifier(self):
x_train, x_test, y_train, y_test, all_data = self.prepare_all_data()
self.d2v.initialize_model(all_data)
self.d2v.train_model()
self.d2v.save_model(d2v_file)
self.classifier.initialize_model()
self.classifier.train_model(self.d2v, x_train, y_train)
self.classifier.save_model(classifier_file)
self.classifier.test_model(self.d2v, x_test, y_test)
return self.d2v, self.classifier

def test_classifier(self, d2v_file, classifier_file):
# x_test, y_test = self.prepare_test_data()
def test_classifier(self):
x_train, x_test, y_train, y_test, all_data = self.prepare_all_data()
self.d2v.load_model(d2v_file)
self.classifier.load_model(classifier_file)
if (self.d2v.model is None or self.classifier.model is None):
logging.info("No Trained Models Found, Train First or Use Correct Model Names")
else:
self.classifier.test_model(self.d2v, x_test, y_test)


def main(argv):
if(len(argv)==4):
mode = argv[0]
dataset_file = argv[1]
d2v_file = argv[2]
classifier_file = argv[3]
if(len(argv)==1):
dataset_file = argv[0]

tc = TextClassifier()
tc.read_data(dataset_file)
if mode.lower() == 'test':
tc.test_classifier(d2v_file, classifier_file)
else:
tc.train_classifier(d2v_file, classifier_file)
else:
print ('python text_classifier.py <mode> <dataset_file> <doc2vec_file> <classifier_file>')
tc.test_classifier()
tc.train_classifier()

else:
print('Please use any of the following Commands to use text_classifier for training/testing/predicting:')
print ('To Run: python text_classifier.py <dataset_file>')

if __name__ == "__main__":
main(sys.argv[1:])

0 comments on commit 418320d

Please sign in to comment.