v0.7.0 (#8)

fmind · Mar 16, 2024 · 7be3a29 · 7be3a29
1 parent 6228d0e
commit 7be3a29
Show file tree

Hide file tree

Showing 70 changed files with 5,633 additions and 1,585 deletions.
diff --git a/.gitignore b/.gitignore
@@ -19,10 +19,10 @@
 # Environs
 .env
 /.venv/
-poetry.lock
 
 # Project
 /docs/*
+/mlruns/*
 /outputs/*
 !**/.gitkeep
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -4,7 +4,6 @@
 default_language_version:
   python: python3.12
 repos:
-  # commons
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.5.0
     hooks:
@@ -17,22 +16,13 @@ repos:
       - id: end-of-file-fixer
       - id: mixed-line-ending
       - id: trailing-whitespace
-  - repo: github.com/PyCQA/bandit
-    rev: v1.7.7
-    hooks:
-      - id: bandit
-        args: ["--configfile=pyproject.toml"]
-  - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.8.0
-    hooks:
-      - id: mypy
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.3.0
+    rev: v0.3.3
     hooks:
       - id: ruff
       - id: ruff-format
   - repo: https://github.com/commitizen-tools/commitizen
-    rev: v3.16.0
+    rev: v3.18.3
     hooks:
       - id: commitizen
       - id: commitizen-branch

diff --git a/confs/promotion.yaml b/confs/promotion.yaml
@@ -0,0 +1,2 @@
+job:
+  KIND: PromotionJob
diff --git a/confs/tuning.yaml b/confs/tuning.yaml
@@ -6,6 +6,3 @@ job:
   targets:
     KIND: ParquetReader
     path: data/targets.parquet
-  results:
-    KIND: ParquetWriter
-    path: outputs/results.parquet
diff --git a/mlops-python-package.code-workspace b/mlops-python-package.code-workspace
@@ -24,8 +24,7 @@
 			"dchanco.vsc-invoke",
 			"ms-python.mypy-type-checker",
 			"ms-python.python",
-			"ms-python.mypy-type-checker",
 			"ms-python.vscode-pylance",
 		]
 	}
-}
+}
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,10 +4,10 @@
 
 [tool.poetry]
 name = "bikes"
-version = "0.6.0"
+version = "0.7.0"
 description = "Predict the number of bikes available."
-documentation = "https://fmind.github.io/mlops-python-package/"
 repository = "https://github.com/fmind/mlops-python-package"
+documentation = "https://fmind.github.io/mlops-python-package/"
 authors = ["Médéric HURIER <[email protected]>"]
 readme = "README.md"
 license = "CC BY"
@@ -23,60 +23,55 @@ bikes = 'bikes.scripts:main'
 
 [tool.poetry.dependencies]
 python = "^3.12"
-codecarbon = "^2.3.4"
 loguru = "^0.7.2"
-mlflow-skinny = "^2.11.0"
+matplotlib = "^3.8.3"
+mlflow = "^2.11.0"
+numpy = "^1.26.4"
 omegaconf = "^2.3.0"
 pandas = "^2.2.1"
-pandera = "^0.18.0"
+pandera = "^0.18.3"
 plotly = "^5.19.0"
-pyarrow = "^15.0.0"
-pydantic = "^2.6.3"
+pyarrow = "^15.0.1"
+pydantic = "^2.6.4"
 pydantic-settings = "^2.2.1"
 scikit-learn = "^1.4.1"
-mlflow = "^2.11.0"
-
-[tool.poetry.group.dev.dependencies]
-invoke = "^2.2.0"
-
-[tool.poetry.group.docs.dependencies]
-pdoc = "^14.4.0"
 
 [tool.poetry.group.checks.dependencies]
-bandit = "^1.7.7"
+bandit = "^1.7.8"
 coverage = "^7.4.3"
-mypy = "^1.8.0"
-pytest = "^8.0.2"
+mypy = "^1.9.0"
+pytest = "^8.1.1"
 pytest-cov = "^4.1.0"
 pytest-xdist = "^3.5.0"
-pandera = { extras = ["mypy"], version = "^0.18.0" }
-ruff = "^0.3.0"
+pandera = { extras = ["mypy"], version = "^0.18.3" }
+ruff = "^0.3.3"
 
 [tool.poetry.group.commits.dependencies]
-commitizen = "^3.16.0"
+commitizen = "^3.18.3"
 pre-commit = "^3.6.2"
 
-[tool.poetry.group.carbons.dependencies]
-dash = "^2.16.0"
-dash-bootstrap-components = "^1.5.0"
-fire = "^0.5.0"
+[tool.poetry.group.dev.dependencies]
+invoke = "^2.2.0"
+
+[tool.poetry.group.docs.dependencies]
+pdoc = "^14.4.0"
 
 [tool.poetry.group.notebooks.dependencies]
 ipykernel = "^6.29.3"
-nbformat = "^5.9.2"
+nbformat = "^5.10.2"
 
 # CONFIGURATIONS
 
 [tool.bandit]
-skips = ["B101"]
+targets = ["src"]
 
 [tool.commitizen]
 name = "cz_conventional_commits"
 tag_format = "v$version"
 version_scheme = "pep440"
 version_provider = "poetry"
 changelog_start_rev = "v1.0.0"
-update_changelog_on_bump = true
+update_changelog_on_bump = false
 major_version_zero = true
 
 [tool.coverage.run]
@@ -92,6 +87,9 @@ check_untyped_defs = true
 ignore_missing_imports = true
 plugins = ["pandera.mypy", "pydantic.mypy"]
 
+[tool.pytest.ini_options]
+addopts = "--verbosity=2"
+
 [tool.ruff]
 fix = true
 line-length = 100
@@ -100,9 +98,6 @@ target-version = "py312"
 [tool.ruff.format]
 docstring-code-format = true
 
-[tool.ruff.lint]
-select = ["D"]
-
 [tool.ruff.lint.pydocstyle]
 convention = "google"
 

diff --git a/src/bikes/core/__init__.py b/src/bikes/core/__init__.py
@@ -0,0 +1 @@
+"""Core components of the project."""
diff --git a/src/bikes/metrics.py → src/bikes/core/metrics.py b/src/bikes/metrics.py → src/bikes/core/metrics.py
@@ -1,4 +1,4 @@
-"""Evaluate model performance with metrics."""
+"""Evaluate model performances with metrics."""
 
 # %% IMPORTS
 
@@ -8,19 +8,19 @@
 import pydantic as pdt
 from sklearn import metrics
 
-from bikes import models, schemas
+from bikes.core import models, schemas
 
 # %% METRICS
 
 
-class Metric(abc.ABC, pdt.BaseModel, strict=True):
-    """Base class for a metric.
+class Metric(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
+    """Base class for a project metric.
 
     Use metrics to evaluate model performance.
     e.g., accuracy, precision, recall, MAE, F1, ...
 
-    Attributes:
-        name (str): name of the metric.
+    Parameters:
+        name (str): name of the metric for the reporting.
     """
 
     KIND: str
@@ -36,31 +36,31 @@ def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float:
             outputs (schemas.Outputs): predicted values.
 
         Returns:
-            float: single result for the metric computation.
+            float: single result from the metric computation.
         """
 
     def scorer(
         self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets
     ) -> float:
-        """Score the model outputs against the targets.
+        """Score the model outputs against targets.
 
         Args:
             model (models.Model): model to evaluate.
             inputs (schemas.Inputs): model inputs values.
             targets (schemas.Targets): model expected values.
 
         Returns:
-            float: metric result.
+            float: single result from the metric computation.
         """
-        outputs = model.predict(inputs=inputs)  # prediction
+        outputs = model.predict(inputs=inputs)
         score = self.score(targets=targets, outputs=outputs)
         return score
 
 
 class SklearnMetric(Metric):
     """Compute metrics with sklearn.
 
-    Attributes:
+    Parameters:
         name (str): name of the sklearn metric.
         greater_is_better (bool): maximize or minimize.
     """

diff --git a/src/bikes/models.py → src/bikes/core/models.py b/src/bikes/models.py → src/bikes/core/models.py
@@ -8,19 +8,20 @@
 import pydantic as pdt
 from sklearn import compose, ensemble, pipeline, preprocessing
 
-from bikes import schemas
+from bikes.core import schemas
 
 # %% TYPES
 
+# Model params
 ParamKey = str
 ParamValue = T.Any
 Params = dict[ParamKey, ParamValue]
 
 # %% MODELS
 
 
-class Model(abc.ABC, pdt.BaseModel, strict=True):
-    """Base class for a model.
+class Model(abc.ABC, pdt.BaseModel, strict=True, frozen=False, extra="forbid"):
+    """Base class for a project model.
 
     Use a model to adapt AI/ML frameworks.
     e.g., to swap easily one model with another.
@@ -32,7 +33,7 @@ def get_params(self, deep: bool = True) -> Params:
         """Get the model params.
 
         Args:
-            deep (bool, optional): ignored. Defaults to True.
+            deep (bool, optional): ignored.
 
         Returns:
             Params: internal model parameters.
@@ -62,7 +63,7 @@ def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> T.Self:
             targets (schemas.Targets): model training targets.
 
         Returns:
-            Model: instance of the model.
+            T.Self: instance of the model.
         """
 
     @abc.abstractmethod
@@ -76,11 +77,22 @@ def predict(self, inputs: schemas.Inputs) -> schemas.Outputs:
             schemas.Outputs: model prediction outputs.
         """
 
+    def get_internal_model(self) -> T.Any:
+        """Return the internal model in the object.
+
+        Raises:
+            NotImplementedError: method not implemented.
+
+        Returns:
+            T.Any: any internal model (either empty or fitted).
+        """
+        raise NotImplementedError()
+
 
 class BaselineSklearnModel(Model):
-    """Simple baseline model with sklearn.
+    """Simple baseline model based on scikit-learn.
 
-    Attributes:
+    Parameters:
         max_depth (int): maximum depth of the random forest.
         n_estimators (int): number of estimators in the random forest.
         random_state (int, optional): random state of the machine learning pipeline.
@@ -142,12 +154,19 @@ def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> "BaselineSkle
 
     @T.override
     def predict(self, inputs: schemas.Inputs) -> schemas.Outputs:
-        assert self._pipeline is not None, "Model should be fitted first!"
-        prediction = self._pipeline.predict(inputs)  # return an np.ndarray
+        model = self.get_internal_model()
+        prediction = model.predict(inputs)
         outputs = schemas.Outputs(
             {schemas.OutputsSchema.prediction: prediction}, index=inputs.index
         )
         return outputs
 
+    @T.override
+    def get_internal_model(self) -> pipeline.Pipeline:
+        model = self._pipeline
+        if model is None:
+            raise ValueError("Model is not fitted yet!")
+        return model
+
 
 ModelKind = BaselineSklearnModel