Skip to content

Commit

Permalink
Refactoring models load/save
Browse files Browse the repository at this point in the history
  • Loading branch information
HardNorth committed Nov 21, 2023
1 parent 3ee0de7 commit 7a94a40
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 105 deletions.
19 changes: 16 additions & 3 deletions app/machine_learning/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(self, folder: str, tags: str, *, object_saver: ObjectSaver = None,
else:
self.object_saver = ObjectSaver({CONFIG_KEY: 'filesystem', 'filesystemDefaultPath': folder})

def load_model(self, model_files: list[str]) -> list[Any]:
def load_models(self, model_files: list[str]) -> list[Any]:
result = []
for file in model_files:
model = self.object_saver.get_project_object(os.path.join(self.folder, file), using_json=False)
Expand All @@ -49,13 +49,26 @@ def load_model(self, model_files: list[str]) -> list[Any]:
result.append(model)
return result

def get_model_info(self):
def save_models(self, data: dict[str, Any] | list[tuple[str, Any]]) -> None:
if isinstance(data, dict):
items = data.items()
else:
items = data
for file_name, object_to_save in items:
self.object_saver.put_project_object(object_to_save, os.path.join(self.folder, file_name),
using_json=False)

def get_model_info(self) -> list[str]:
folder_name = os.path.basename(self.folder.strip("/").strip("\\")).strip()
tags = self.tags
if folder_name:
tags = [folder_name] + self.tags
return tags

@abstractmethod
def save_model(self):
def load_model(self) -> list[Any]:
raise NotImplementedError('"load_model" method is not implemented!')

@abstractmethod
def save_model(self) -> None:
raise NotImplementedError('"save_model" method is not implemented!')
34 changes: 11 additions & 23 deletions app/machine_learning/models/boosting_decision_maker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import pickle
from typing import Any

from sklearn.metrics import classification_report, confusion_matrix
Expand All @@ -27,6 +25,9 @@

logger = logging.getLogger("analyzerApp.boosting_decision_maker")

MODEL_FILES: list[str] = ['boost_model.pickle', 'data_features_config.pickle',
'features_dict_with_saved_objects.pickle']


class BoostingDecisionMaker(MlModel):

Expand All @@ -40,7 +41,10 @@ def __init__(self, folder: str, n_estimators: int = 75, max_depth: int = 5, mono
monotonous_features)
self.features_dict_with_saved_objects = {}
if folder and folder.strip():
self.load_model()
boost_model, features_config, features_dict = self.load_model()
self.n_estimators, self.max_depth, self.xg_boost = boost_model
self.full_config, self.feature_ids, self.monotonous_features = features_config
self.features_dict_with_saved_objects = self.transform_feature_encoders_to_objects(features_dict)
else:
self.xg_boost = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=43)

Expand Down Expand Up @@ -81,28 +85,12 @@ def transform_feature_encoders_to_objects(self, features_dict_with_saved_objects
return _features_dict_with_saved_objects

def load_model(self):
with open(os.path.join(self.folder, "boost_model.pickle"), "rb") as f:
self.n_estimators, self.max_depth, self.xg_boost = pickle.load(f)
with open(os.path.join(self.folder, "data_features_config.pickle"), "rb") as f:
self.full_config, self.feature_ids, self.monotonous_features = pickle.load(f)
if os.path.exists(os.path.join(self.folder, "features_dict_with_saved_objects.pickle")):
features_dict_with_saved_objects = {}
with open(os.path.join(self.folder, "features_dict_with_saved_objects.pickle"), "rb") as f:
features_dict_with_saved_objects = pickle.load(f)
self.features_dict_with_saved_objects = self.transform_feature_encoders_to_objects(
features_dict_with_saved_objects)
else:
self.features_dict_with_saved_objects = {}
return self.load_models(MODEL_FILES)

def save_model(self):
if not os.path.exists(self.folder):
os.makedirs(self.folder)
with open(os.path.join(self.folder, "boost_model.pickle"), "wb") as f:
pickle.dump([self.n_estimators, self.max_depth, self.xg_boost], f)
with open(os.path.join(self.folder, "data_features_config.pickle"), "wb") as f:
pickle.dump([self.full_config, self.feature_ids, self.monotonous_features], f)
with open(os.path.join(self.folder, "features_dict_with_saved_objects.pickle"), "wb") as f:
pickle.dump(self.transform_feature_encoders_to_dict(), f)
self.save_models(zip(MODEL_FILES, [[self.n_estimators, self.max_depth, self.xg_boost],
[self.full_config, self.feature_ids, self.monotonous_features],
self.transform_feature_encoders_to_dict()]))

def train_model(self, train_data, labels):
mon_features = [
Expand Down
37 changes: 3 additions & 34 deletions app/machine_learning/models/custom_boosting_decision_maker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,45 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from typing import Any

from app.commons import logging
from app.commons.object_saving.object_saver import ObjectSaver
from app.machine_learning.models.boosting_decision_maker import BoostingDecisionMaker

logger = logging.getLogger("analyzerApp.custom_boosting_decision_maker")


class CustomBoostingDecisionMaker(BoostingDecisionMaker):
project_id: int | str

def __init__(self, folder: str, app_config: dict[str, Any], project_id: int | str):
super().__init__(folder=folder, tags='custom boosting model', app_config=app_config)
self.project_id = project_id

def load_model(self):
self.n_estimators, self.max_depth, self.xg_boost = self.object_saver.get_project_object(
os.path.join(self.folder, "boost_model"), self.project_id, using_json=False)
assert self.xg_boost is not None
self.full_config, self.feature_ids, self.monotonous_features = self.object_saver.get_project_object(
os.path.join(self.folder, "data_features_config"), self.project_id, using_json=False)
assert len(self.full_config) > 0
if self.object_saver.does_object_exists(os.path.join(self.folder, "features_dict_with_saved_objects"),
self.project_id):
features_dict_with_saved_objects = self.object_saver.get_project_object(
os.path.join(self.folder, "features_dict_with_saved_objects"), self.project_id, using_json=False)
self.features_dict_with_saved_objects = self.transform_feature_encoders_to_objects(
features_dict_with_saved_objects)
else:
self.features_dict_with_saved_objects = {}

def save_model(self):
self.object_saver.put_project_object([self.n_estimators, self.max_depth, self.xg_boost],
os.path.join(self.folder, "boost_model"), self.project_id,
using_json=False)
self.object_saver.put_project_object([self.full_config, self.feature_ids, self.monotonous_features],
os.path.join(self.folder, "data_features_config"), self.project_id,
using_json=False)
self.object_saver.put_project_object(self.transform_feature_encoders_to_dict(),
os.path.join(self.folder, "features_dict_with_saved_objects"),
self.project_id, using_json=False)
super().__init__(folder=folder, tags='custom boosting model', app_config=app_config,
object_saver=ObjectSaver(app_config, project_id))
9 changes: 2 additions & 7 deletions app/machine_learning/models/custom_defect_type_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from typing import Any

from app.commons.object_saving.object_saver import ObjectSaver
Expand All @@ -22,9 +21,5 @@
class CustomDefectTypeModel(DefectTypeModel):

def __init__(self, folder: str, app_config: dict[str, Any], project_id: int | str):
super().__init__(folder, 'custom defect type model', object_saver=ObjectSaver(app_config, project_id))

def save_model(self):
self.object_saver.put_project_object(self.count_vectorizer_models,
os.path.join(self.folder, "count_vectorizer_models"), using_json=False)
self.object_saver.put_project_object(self.models, os.path.join(self.folder, "models"), using_json=False)
super().__init__(folder, 'custom defect type model', app_config=app_config,
object_saver=ObjectSaver(app_config, project_id))
18 changes: 9 additions & 9 deletions app/machine_learning/models/defect_type_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import pickle
from collections import Counter
from typing import Any

Expand All @@ -27,21 +25,23 @@
from app.machine_learning.models import MlModel
from app.utils import text_processing

MODEL_FILES: list[str] = ['count_vectorizer_models.pickle', 'models.pickle']


class DefectTypeModel(MlModel):
count_vectorizer_models: dict
models: dict

def __init__(self, folder: str, tags: str = 'global defect type model', object_saver: ObjectSaver = None,
app_config: dict[str, Any] = None) -> None:
super().__init__(folder, tags, object_saver=object_saver, app_config=app_config)
self.count_vectorizer_models, self.models = self.load_model(
['count_vectorizer_models.pickle', 'models.pickle'])
self.count_vectorizer_models, self.models = self.load_model()

def load_model(self) -> list[Any]:
return self.load_models(MODEL_FILES)

def save_model(self):
os.makedirs(self.folder, exist_ok=True)
with open(os.path.join(self.folder, "count_vectorizer_models.pickle"), "wb") as f:
pickle.dump(self.count_vectorizer_models, f)
with open(os.path.join(self.folder, "models.pickle"), "wb") as f:
pickle.dump(self.models, f)
self.save_models(zip(MODEL_FILES, self.count_vectorizer_models, self.models))

def train_model(self, name, train_data_x, labels):
self.count_vectorizer_models[name] = TfidfVectorizer(
Expand Down
37 changes: 8 additions & 29 deletions app/machine_learning/models/weighted_similarity_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,55 +13,34 @@
# limitations under the License.

import math
import os
import pickle

import numpy as np

from app.machine_learning.models import MlModel
from app.utils import text_processing

MODEL_FILES: list[str] = ['weights.pickle', 'config.pickle']


class WeightedSimilarityCalculator(MlModel):

def __init__(self, folder, block_to_split=10, min_log_number_in_block=1):
super().__init__(folder, 'global similarity model')
self.block_to_split = block_to_split
self.min_log_number_in_block = min_log_number_in_block
self.weights = None
self.softmax_weights = None
self.load_model()
weights, self.config = self.load_model()
self.block_to_split, self.min_log_number_in_block, self.weights, self.softmax_weights = weights

def load_model(self):
if not os.path.exists(os.path.join(self.folder, "weights.pickle")):
return
with open(os.path.join(self.folder, "weights.pickle"), "rb") as f:
self.block_to_split, self.min_log_number_in_block, self.weights, self.softmax_weights = \
pickle.load(f)
if not os.path.exists(os.path.join(self.folder, "config.pickle")):
return
try:
with open(os.path.join(self.folder, "config.pickle"), "wb") as f:
self.config = pickle.load(f)
except: # noqa
pass
return self.load_models(MODEL_FILES)

def add_config_info(self, config):
self.config = config

def save_model(self):
if not os.path.exists(self.folder):
os.makedirs(self.folder)
if self.weights is not None:
with open(os.path.join(self.folder, "weights.pickle"), "wb") as f:
pickle.dump([self.block_to_split, self.min_log_number_in_block,
self.weights, self.softmax_weights], f)
try:
if self.config:
with open(os.path.join(self.folder, "config.pickle"), "wb") as f:
pickle.dump(self.config, f)
except: # noqa
pass
self.save_models(zip(
MODEL_FILES,
[[self.block_to_split, self.min_log_number_in_block, self.weights, self.softmax_weights], self.config]))

def message_to_array(self, detected_message_res, stacktrace_res):
all_lines = [" ".join(text_processing.split_words(detected_message_res))]
Expand Down

0 comments on commit 7a94a40

Please sign in to comment.