diff --git a/models/classifier_model.py b/models/classifier_model.py index 95eb8ba..f057e4d 100644 --- a/models/classifier_model.py +++ b/models/classifier_model.py @@ -4,7 +4,6 @@ import inspect from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, f1_score -from sklearn.externals import joblib from .model import Model from .doc2vec_model import doc2VecModel @@ -33,17 +32,9 @@ def train_model(self, d2v, training_vectors, training_labels): def save_model(self, filename): logging.info("Saving trained classification model") - filename = os.path.join(classifiers_path, filename) - joblib.dump(self.model, filename) def load_model(self, filename): logging.info("Loading trained classification model") - filename = os.path.join(classifiers_path, filename) - if (os.path.isfile(filename)): - loaded_model = joblib.load(filename) - self.model = loaded_model - else: - self.model = None def test_model(self, d2v, testing_vectors, testing_labels): logging.info("Classifier testing") @@ -52,3 +43,9 @@ def test_model(self, d2v, testing_vectors, testing_labels): logging.info('Testing predicted classes: {}'.format(np.unique(testing_predictions))) logging.info('Testing accuracy: {}'.format(accuracy_score(testing_labels, testing_predictions))) logging.info('Testing F1 score: {}'.format(f1_score(testing_labels, testing_predictions, average='weighted'))) + + def predict(self, d2v, testing_vectors): + logging.info("Classifier Predicting") + test_vectors = doc2VecModel.get_vectors(d2v, len(testing_vectors), 300, 'Test') + testing_predictions = self.model.predict(test_vectors) + logging.info(testing_predictions) \ No newline at end of file diff --git a/text_classifier.py b/text_classifier.py index d934b41..0f09fca 100644 --- a/text_classifier.py +++ b/text_classifier.py @@ -2,6 +2,7 @@ import logging import sys, getopt import os, inspect +import numpy as np from sklearn.model_selection import train_test_split from models.doc2vec_model import doc2VecModel from models.classifier_model import classifierModel @@ -10,6 +11,9 @@ base_file_path = inspect.getframeinfo(inspect.currentframe()).filename project_dir_path = os.path.dirname(os.path.abspath(base_file_path)) data_path = os.path.join(project_dir_path, 'data') +default_classifier = os.path.join(project_dir_path, 'classifiers','joblib_model.pkl') +default_doc2vec= os.path.join(project_dir_path, 'classifiers','d2v.model') +default_dataset= os.path.join(data_path, 'dataset.csv') class TextClassifier(): @@ -31,48 +35,38 @@ def prepare_all_data(self): all_data = x_train + x_test return x_train, x_test, y_train, y_test, all_data - # def prepare_test_data(self): - # x_test = doc2VecBuilder.label_sentences(self.dataset.review, 'Test') - # y_test = self.dataset.sentiment - # return x_test, y_test + def prepare_test_data(self,sentence): + x_test = doc2VecModel.label_sentences(sentence, 'Test') + return x_test - def train_classifier(self, d2v_file, classifier_file): + def train_classifier(self): x_train, x_test, y_train, y_test, all_data = self.prepare_all_data() self.d2v.initialize_model(all_data) self.d2v.train_model() - self.d2v.save_model(d2v_file) self.classifier.initialize_model() self.classifier.train_model(self.d2v, x_train, y_train) - self.classifier.save_model(classifier_file) self.classifier.test_model(self.d2v, x_test, y_test) + return self.d2v, self.classifier - def test_classifier(self, d2v_file, classifier_file): - # x_test, y_test = self.prepare_test_data() + def test_classifier(self): x_train, x_test, y_train, y_test, all_data = self.prepare_all_data() - self.d2v.load_model(d2v_file) - self.classifier.load_model(classifier_file) if (self.d2v.model is None or self.classifier.model is None): logging.info("No Trained Models Found, Train First or Use Correct Model Names") else: self.classifier.test_model(self.d2v, x_test, y_test) - def main(argv): - if(len(argv)==4): - mode = argv[0] - dataset_file = argv[1] - d2v_file = argv[2] - classifier_file = argv[3] + if(len(argv)==1): + dataset_file = argv[0] tc = TextClassifier() tc.read_data(dataset_file) - if mode.lower() == 'test': - tc.test_classifier(d2v_file, classifier_file) - else: - tc.train_classifier(d2v_file, classifier_file) - else: - print ('python text_classifier.py ') + tc.test_classifier() + tc.train_classifier() + else: + print('Please use any of the following Commands to use text_classifier for training/testing/predicting:') + print ('To Run: python text_classifier.py ') if __name__ == "__main__": main(sys.argv[1:])