Skip to content

Commit

Permalink
ibrahimsharaf#14 Relevant file paths
Browse files Browse the repository at this point in the history
  • Loading branch information
Ayatallah committed May 14, 2019
1 parent eca097e commit e7c9576
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 14 deletions.
14 changes: 10 additions & 4 deletions models/classifier_builder.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import logging
import numpy as np
import os
import inspect
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.externals import joblib
from .model_builder import ModelBuilder
from .doc2vec_builder import doc2VecBuilder

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

base_file_path = inspect.getframeinfo(inspect.currentframe()).filename
base_path = os.path.dirname(os.path.abspath(base_file_path))
project_dir_path = os.path.dirname(os.path.abspath(base_path))
classifiers_path = os.path.join(project_dir_path, 'classifiers')

class classifierBuilder(ModelBuilder):
def __init__(self):
Expand All @@ -29,12 +33,14 @@ def train_model(self, d2v, training_vectors, training_labels):

def save_model(self, filename):
logging.info("Saving trained classification model")
joblib.dump(self.model, "./classifiers/" + filename)
filename = os.path.join(classifiers_path, filename)
joblib.dump(self.model, filename)

def load_model(self, filename):
logging.info("Loading trained classification model")
if (os.path.isfile('./classifiers/' + filename)):
loaded_model = joblib.load('./classifiers/' + filename)
filename = os.path.join(classifiers_path, filename)
if (os.path.isfile(filename)):
loaded_model = joblib.load(filename)
self.model = loaded_model
else:
self.model = None
Expand Down
14 changes: 10 additions & 4 deletions models/doc2vec_builder.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import logging
import random
import os
import inspect
import numpy as np
from gensim.models import doc2vec
from gensim.models.doc2vec import Doc2Vec

from .model_builder import ModelBuilder

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

base_file_path = inspect.getframeinfo(inspect.currentframe()).filename
base_path = os.path.dirname(os.path.abspath(base_file_path))
project_dir_path = os.path.dirname(os.path.abspath(base_path))
classifiers_path = os.path.join(project_dir_path, 'classifiers')

class doc2VecBuilder(ModelBuilder):

Expand Down Expand Up @@ -45,12 +49,14 @@ def train_model(self):

def save_model(self, filename):
logging.info("Saving trained Doc2Vec model")
self.model.save("./classifiers/" + filename)
filename = os.path.join(classifiers_path, filename)
self.model.save(filename)

def load_model(self, filename):
logging.info("Loading trained Doc2Vec model")
if (os.path.isfile('./classifiers/' + filename)):
d2v = Doc2Vec.load("./classifiers/" + filename)
filename = os.path.join(classifiers_path, filename)
if (os.path.isfile(filename)):
d2v = Doc2Vec.load(filename)
self.model = d2v
else:
self.model = None
Expand Down
2 changes: 2 additions & 0 deletions models/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,5 @@ def save_model(self):
@abstractmethod
def load_model(self):
pass


17 changes: 11 additions & 6 deletions text_classifier.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import pandas as pd
import logging
import sys, getopt
import os, inspect
from sklearn.model_selection import train_test_split
from models.doc2vec_builder import doc2VecBuilder
from models.classifier_builder import classifierBuilder

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

base_file_path = inspect.getframeinfo(inspect.currentframe()).filename
base_path = os.path.dirname(os.path.abspath(base_file_path))
project_dir_path = os.path.dirname(os.path.abspath(base_path))
data_path = os.path.join(project_dir_path, 'data')

class TextClassifier():

Expand All @@ -16,8 +20,9 @@ def __init__(self):
self.classifier = classifierBuilder()
self.dataset = None

def read_data(self, path):
self.dataset = pd.read_csv(path, header=0, delimiter="\t")
def read_data(self, filename):
filename = os.path.join(data_path, filename)
self.dataset = pd.read_csv(filename, header=0, delimiter="\t")

def prepare_all_data(self):
x_train, x_test, y_train, y_test = train_test_split(self.dataset.review, self.dataset.sentiment, random_state=0,
Expand Down Expand Up @@ -53,15 +58,15 @@ def test_classifier(self, d2v_file, classifier_file):
self.classifier.test_model(self.d2v, x_test, y_test)


def run(mode, d2v_file, classifier_file):
def run(mode,dataset_file, d2v_file, classifier_file):
tc = TextClassifier()
tc.read_data('./data/dataset.csv')
tc.read_data(dataset_file)
if mode == 'Test':
tc.test_classifier(d2v_file, classifier_file)
else:
tc.train_classifier(d2v_file, classifier_file)


if __name__ == "__main__":
run("Train", "d2v.model", "joblib_model.pkl")
run("Train","dataset.csv", "d2v.model", "joblib_model.pkl")
# run("Test", "d2v.model", "joblib_model.pkl")

0 comments on commit e7c9576

Please sign in to comment.