From e7c9576dc2200e53aa9e19b7bc97a984800f95ed Mon Sep 17 00:00:00 2001 From: Aya Amr Date: Wed, 15 May 2019 00:32:25 +0200 Subject: [PATCH] #14 Relevant file paths --- models/classifier_builder.py | 14 ++++++++++---- models/doc2vec_builder.py | 14 ++++++++++---- models/model_builder.py | 2 ++ text_classifier.py | 17 +++++++++++------ 4 files changed, 33 insertions(+), 14 deletions(-) diff --git a/models/classifier_builder.py b/models/classifier_builder.py index 71d75a8..607bb6f 100644 --- a/models/classifier_builder.py +++ b/models/classifier_builder.py @@ -1,6 +1,7 @@ import logging import numpy as np import os +import inspect from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, f1_score from sklearn.externals import joblib @@ -8,7 +9,10 @@ from .doc2vec_builder import doc2VecBuilder logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) - +base_file_path = inspect.getframeinfo(inspect.currentframe()).filename +base_path = os.path.dirname(os.path.abspath(base_file_path)) +project_dir_path = os.path.dirname(os.path.abspath(base_path)) +classifiers_path = os.path.join(project_dir_path, 'classifiers') class classifierBuilder(ModelBuilder): def __init__(self): @@ -29,12 +33,14 @@ def train_model(self, d2v, training_vectors, training_labels): def save_model(self, filename): logging.info("Saving trained classification model") - joblib.dump(self.model, "./classifiers/" + filename) + filename = os.path.join(classifiers_path, filename) + joblib.dump(self.model, filename) def load_model(self, filename): logging.info("Loading trained classification model") - if (os.path.isfile('./classifiers/' + filename)): - loaded_model = joblib.load('./classifiers/' + filename) + filename = os.path.join(classifiers_path, filename) + if (os.path.isfile(filename)): + loaded_model = joblib.load(filename) self.model = loaded_model else: self.model = None diff --git a/models/doc2vec_builder.py b/models/doc2vec_builder.py index 33450f1..7e1416a 100644 --- a/models/doc2vec_builder.py +++ b/models/doc2vec_builder.py @@ -1,6 +1,7 @@ import logging import random import os +import inspect import numpy as np from gensim.models import doc2vec from gensim.models.doc2vec import Doc2Vec @@ -8,7 +9,10 @@ from .model_builder import ModelBuilder logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) - +base_file_path = inspect.getframeinfo(inspect.currentframe()).filename +base_path = os.path.dirname(os.path.abspath(base_file_path)) +project_dir_path = os.path.dirname(os.path.abspath(base_path)) +classifiers_path = os.path.join(project_dir_path, 'classifiers') class doc2VecBuilder(ModelBuilder): @@ -45,12 +49,14 @@ def train_model(self): def save_model(self, filename): logging.info("Saving trained Doc2Vec model") - self.model.save("./classifiers/" + filename) + filename = os.path.join(classifiers_path, filename) + self.model.save(filename) def load_model(self, filename): logging.info("Loading trained Doc2Vec model") - if (os.path.isfile('./classifiers/' + filename)): - d2v = Doc2Vec.load("./classifiers/" + filename) + filename = os.path.join(classifiers_path, filename) + if (os.path.isfile(filename)): + d2v = Doc2Vec.load(filename) self.model = d2v else: self.model = None diff --git a/models/model_builder.py b/models/model_builder.py index 0d4b1e4..187b593 100644 --- a/models/model_builder.py +++ b/models/model_builder.py @@ -22,3 +22,5 @@ def save_model(self): @abstractmethod def load_model(self): pass + + diff --git a/text_classifier.py b/text_classifier.py index 05ae51d..fa1a5b4 100644 --- a/text_classifier.py +++ b/text_classifier.py @@ -1,12 +1,16 @@ import pandas as pd import logging import sys, getopt +import os, inspect from sklearn.model_selection import train_test_split from models.doc2vec_builder import doc2VecBuilder from models.classifier_builder import classifierBuilder logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) - +base_file_path = inspect.getframeinfo(inspect.currentframe()).filename +base_path = os.path.dirname(os.path.abspath(base_file_path)) +project_dir_path = os.path.dirname(os.path.abspath(base_path)) +data_path = os.path.join(project_dir_path, 'data') class TextClassifier(): @@ -16,8 +20,9 @@ def __init__(self): self.classifier = classifierBuilder() self.dataset = None - def read_data(self, path): - self.dataset = pd.read_csv(path, header=0, delimiter="\t") + def read_data(self, filename): + filename = os.path.join(data_path, filename) + self.dataset = pd.read_csv(filename, header=0, delimiter="\t") def prepare_all_data(self): x_train, x_test, y_train, y_test = train_test_split(self.dataset.review, self.dataset.sentiment, random_state=0, @@ -53,9 +58,9 @@ def test_classifier(self, d2v_file, classifier_file): self.classifier.test_model(self.d2v, x_test, y_test) -def run(mode, d2v_file, classifier_file): +def run(mode,dataset_file, d2v_file, classifier_file): tc = TextClassifier() - tc.read_data('./data/dataset.csv') + tc.read_data(dataset_file) if mode == 'Test': tc.test_classifier(d2v_file, classifier_file) else: @@ -63,5 +68,5 @@ def run(mode, d2v_file, classifier_file): if __name__ == "__main__": - run("Train", "d2v.model", "joblib_model.pkl") + run("Train","dataset.csv", "d2v.model", "joblib_model.pkl") # run("Test", "d2v.model", "joblib_model.pkl")