Skip to content

Commit

Permalink
v0.7.0 (#8)
Browse files Browse the repository at this point in the history
  • Loading branch information
fmind authored Mar 16, 2024
1 parent 6228d0e commit 7be3a29
Show file tree
Hide file tree
Showing 70 changed files with 5,633 additions and 1,585 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
# Environs
.env
/.venv/
poetry.lock

# Project
/docs/*
/mlruns/*
/outputs/*
!**/.gitkeep

Expand Down
14 changes: 2 additions & 12 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
default_language_version:
python: python3.12
repos:
# commons
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
Expand All @@ -17,22 +16,13 @@ repos:
- id: end-of-file-fixer
- id: mixed-line-ending
- id: trailing-whitespace
- repo: github.com/PyCQA/bandit
rev: v1.7.7
hooks:
- id: bandit
args: ["--configfile=pyproject.toml"]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.8.0
hooks:
- id: mypy
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.0
rev: v0.3.3
hooks:
- id: ruff
- id: ruff-format
- repo: https://github.com/commitizen-tools/commitizen
rev: v3.16.0
rev: v3.18.3
hooks:
- id: commitizen
- id: commitizen-branch
Expand Down
2 changes: 2 additions & 0 deletions confs/promotion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
job:
KIND: PromotionJob
3 changes: 0 additions & 3 deletions confs/tuning.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,3 @@ job:
targets:
KIND: ParquetReader
path: data/targets.parquet
results:
KIND: ParquetWriter
path: outputs/results.parquet
3 changes: 1 addition & 2 deletions mlops-python-package.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@
"dchanco.vsc-invoke",
"ms-python.mypy-type-checker",
"ms-python.python",
"ms-python.mypy-type-checker",
"ms-python.vscode-pylance",
]
}
}
}
3,532 changes: 3,532 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

55 changes: 25 additions & 30 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

[tool.poetry]
name = "bikes"
version = "0.6.0"
version = "0.7.0"
description = "Predict the number of bikes available."
documentation = "https://fmind.github.io/mlops-python-package/"
repository = "https://github.com/fmind/mlops-python-package"
documentation = "https://fmind.github.io/mlops-python-package/"
authors = ["Médéric HURIER <[email protected]>"]
readme = "README.md"
license = "CC BY"
Expand All @@ -23,60 +23,55 @@ bikes = 'bikes.scripts:main'

[tool.poetry.dependencies]
python = "^3.12"
codecarbon = "^2.3.4"
loguru = "^0.7.2"
mlflow-skinny = "^2.11.0"
matplotlib = "^3.8.3"
mlflow = "^2.11.0"
numpy = "^1.26.4"
omegaconf = "^2.3.0"
pandas = "^2.2.1"
pandera = "^0.18.0"
pandera = "^0.18.3"
plotly = "^5.19.0"
pyarrow = "^15.0.0"
pydantic = "^2.6.3"
pyarrow = "^15.0.1"
pydantic = "^2.6.4"
pydantic-settings = "^2.2.1"
scikit-learn = "^1.4.1"
mlflow = "^2.11.0"

[tool.poetry.group.dev.dependencies]
invoke = "^2.2.0"

[tool.poetry.group.docs.dependencies]
pdoc = "^14.4.0"

[tool.poetry.group.checks.dependencies]
bandit = "^1.7.7"
bandit = "^1.7.8"
coverage = "^7.4.3"
mypy = "^1.8.0"
pytest = "^8.0.2"
mypy = "^1.9.0"
pytest = "^8.1.1"
pytest-cov = "^4.1.0"
pytest-xdist = "^3.5.0"
pandera = { extras = ["mypy"], version = "^0.18.0" }
ruff = "^0.3.0"
pandera = { extras = ["mypy"], version = "^0.18.3" }
ruff = "^0.3.3"

[tool.poetry.group.commits.dependencies]
commitizen = "^3.16.0"
commitizen = "^3.18.3"
pre-commit = "^3.6.2"

[tool.poetry.group.carbons.dependencies]
dash = "^2.16.0"
dash-bootstrap-components = "^1.5.0"
fire = "^0.5.0"
[tool.poetry.group.dev.dependencies]
invoke = "^2.2.0"

[tool.poetry.group.docs.dependencies]
pdoc = "^14.4.0"

[tool.poetry.group.notebooks.dependencies]
ipykernel = "^6.29.3"
nbformat = "^5.9.2"
nbformat = "^5.10.2"

# CONFIGURATIONS

[tool.bandit]
skips = ["B101"]
targets = ["src"]

[tool.commitizen]
name = "cz_conventional_commits"
tag_format = "v$version"
version_scheme = "pep440"
version_provider = "poetry"
changelog_start_rev = "v1.0.0"
update_changelog_on_bump = true
update_changelog_on_bump = false
major_version_zero = true

[tool.coverage.run]
Expand All @@ -92,6 +87,9 @@ check_untyped_defs = true
ignore_missing_imports = true
plugins = ["pandera.mypy", "pydantic.mypy"]

[tool.pytest.ini_options]
addopts = "--verbosity=2"

[tool.ruff]
fix = true
line-length = 100
Expand All @@ -100,9 +98,6 @@ target-version = "py312"
[tool.ruff.format]
docstring-code-format = true

[tool.ruff.lint]
select = ["D"]

[tool.ruff.lint.pydocstyle]
convention = "google"

Expand Down
1 change: 1 addition & 0 deletions src/bikes/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Core components of the project."""
22 changes: 11 additions & 11 deletions src/bikes/metrics.py → src/bikes/core/metrics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Evaluate model performance with metrics."""
"""Evaluate model performances with metrics."""

# %% IMPORTS

Expand All @@ -8,19 +8,19 @@
import pydantic as pdt
from sklearn import metrics

from bikes import models, schemas
from bikes.core import models, schemas

# %% METRICS


class Metric(abc.ABC, pdt.BaseModel, strict=True):
"""Base class for a metric.
class Metric(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
"""Base class for a project metric.
Use metrics to evaluate model performance.
e.g., accuracy, precision, recall, MAE, F1, ...
Attributes:
name (str): name of the metric.
Parameters:
name (str): name of the metric for the reporting.
"""

KIND: str
Expand All @@ -36,31 +36,31 @@ def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float:
outputs (schemas.Outputs): predicted values.
Returns:
float: single result for the metric computation.
float: single result from the metric computation.
"""

def scorer(
self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets
) -> float:
"""Score the model outputs against the targets.
"""Score the model outputs against targets.
Args:
model (models.Model): model to evaluate.
inputs (schemas.Inputs): model inputs values.
targets (schemas.Targets): model expected values.
Returns:
float: metric result.
float: single result from the metric computation.
"""
outputs = model.predict(inputs=inputs) # prediction
outputs = model.predict(inputs=inputs)
score = self.score(targets=targets, outputs=outputs)
return score


class SklearnMetric(Metric):
"""Compute metrics with sklearn.
Attributes:
Parameters:
name (str): name of the sklearn metric.
greater_is_better (bool): maximize or minimize.
"""
Expand Down
37 changes: 28 additions & 9 deletions src/bikes/models.py → src/bikes/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,20 @@
import pydantic as pdt
from sklearn import compose, ensemble, pipeline, preprocessing

from bikes import schemas
from bikes.core import schemas

# %% TYPES

# Model params
ParamKey = str
ParamValue = T.Any
Params = dict[ParamKey, ParamValue]

# %% MODELS


class Model(abc.ABC, pdt.BaseModel, strict=True):
"""Base class for a model.
class Model(abc.ABC, pdt.BaseModel, strict=True, frozen=False, extra="forbid"):
"""Base class for a project model.
Use a model to adapt AI/ML frameworks.
e.g., to swap easily one model with another.
Expand All @@ -32,7 +33,7 @@ def get_params(self, deep: bool = True) -> Params:
"""Get the model params.
Args:
deep (bool, optional): ignored. Defaults to True.
deep (bool, optional): ignored.
Returns:
Params: internal model parameters.
Expand Down Expand Up @@ -62,7 +63,7 @@ def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> T.Self:
targets (schemas.Targets): model training targets.
Returns:
Model: instance of the model.
T.Self: instance of the model.
"""

@abc.abstractmethod
Expand All @@ -76,11 +77,22 @@ def predict(self, inputs: schemas.Inputs) -> schemas.Outputs:
schemas.Outputs: model prediction outputs.
"""

def get_internal_model(self) -> T.Any:
"""Return the internal model in the object.
Raises:
NotImplementedError: method not implemented.
Returns:
T.Any: any internal model (either empty or fitted).
"""
raise NotImplementedError()


class BaselineSklearnModel(Model):
"""Simple baseline model with sklearn.
"""Simple baseline model based on scikit-learn.
Attributes:
Parameters:
max_depth (int): maximum depth of the random forest.
n_estimators (int): number of estimators in the random forest.
random_state (int, optional): random state of the machine learning pipeline.
Expand Down Expand Up @@ -142,12 +154,19 @@ def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> "BaselineSkle

@T.override
def predict(self, inputs: schemas.Inputs) -> schemas.Outputs:
assert self._pipeline is not None, "Model should be fitted first!"
prediction = self._pipeline.predict(inputs) # return an np.ndarray
model = self.get_internal_model()
prediction = model.predict(inputs)
outputs = schemas.Outputs(
{schemas.OutputsSchema.prediction: prediction}, index=inputs.index
)
return outputs

@T.override
def get_internal_model(self) -> pipeline.Pipeline:
model = self._pipeline
if model is None:
raise ValueError("Model is not fitted yet!")
return model


ModelKind = BaselineSklearnModel
Loading

0 comments on commit 7be3a29

Please sign in to comment.