From 9fef2739c244044bb34a42413f42df3b84de3c9a Mon Sep 17 00:00:00 2001 From: Lukas Fehring Date: Tue, 21 Nov 2023 10:21:31 +0100 Subject: [PATCH] Commit everthing to reset pc --- conditional_example.py | 104 ++++++++++++ conditional_example.yml | 25 +++ example.yml | 46 ++++++ general_example.py | 151 ++++++++++++++++++ logtable_example.py | 101 ++++++++++++ logtable_example.yml | 37 +++++ .../configs/integration_test_mysql.yml | 29 ++++ .../configs/integration_test_sqlite.yml | 29 ++++ .../configs/test_config_mysql.yml | 31 ++++ .../configs/test_config_sqlite.yml | 31 ++++ .../test_codecarbon_core_functions_mysql.py | 39 +---- .../test_codecarbon_core_functions_sqlite.py | 40 +---- .../test_codecarbon/test_integration_mysql.py | 31 +--- .../test_integration_sqlite.py | 28 +--- 14 files changed, 607 insertions(+), 115 deletions(-) create mode 100644 conditional_example.py create mode 100644 conditional_example.yml create mode 100644 example.yml create mode 100644 general_example.py create mode 100644 logtable_example.py create mode 100644 logtable_example.yml create mode 100644 test/test_codecarbon/configs/integration_test_mysql.yml create mode 100644 test/test_codecarbon/configs/integration_test_sqlite.yml create mode 100644 test/test_codecarbon/configs/test_config_mysql.yml create mode 100644 test/test_codecarbon/configs/test_config_sqlite.yml diff --git a/conditional_example.py b/conditional_example.py new file mode 100644 index 00000000..8847036f --- /dev/null +++ b/conditional_example.py @@ -0,0 +1,104 @@ +# %% [markdown] +# # Example: Conditional Parameter Grids +# +# This example shows the usage of `PyExperimenter` with a conditional parameter grid. We will programmatically define the parameter combinations of a support vector machine, instead of generating the entire cartesian product from the parameters defined in the config file. +# +# To execute this notebook you need to install: +# ``` +# pip install py_experimenter +# pip install scikit-learn +# ``` + +# %% [markdown] +# ## Experiment Configuration File +# This notebook shows an example execution of `PyExperimenter` based on an experiment configuration file. Further explanation about the usage of `PyExperimenter` can be found in the [documentation](https://tornede.github.io/py_experimenter/usage.html). Here, we only define keyfields and resultfields and do not set the parameter values in the experiment configuration file as we will create the parameter grid programmatically. + +import random +from random import randint +from time import sleep + +import numpy as np +from sklearn.datasets import load_iris +from sklearn.model_selection import cross_validate +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler +from sklearn.svm import SVC + +from py_experimenter.experimenter import PyExperimenter +# %% +from py_experimenter.result_processor import ResultProcessor + + +def run_svm(parameters: dict, result_processor: ResultProcessor, custom_config: dict): + sleep(randint(0, 5)) + seed = parameters['seed'] + random.seed(seed) + np.random.seed(seed) + + data = load_iris() + + X = data.data + y = data.target + + # Create Support Vector Machine with parameters dependent on the kernel + kernel = parameters['kernel'] + if kernel == 'linear': + svc = SVC(kernel=parameters['kernel']) + elif kernel == 'poly': + svc = SVC(kernel=parameters['kernel'], gamma=parameters['gamma'], coef0=parameters['coef0'], degree=parameters['degree']) + elif kernel == 'rbf': + svc = SVC(kernel=parameters['kernel'], gamma=parameters['gamma']) + + svc = SVC() + + model = make_pipeline(StandardScaler(), svc) + + if parameters['dataset'] != 'iris': + raise ValueError("Example error") + + scores = cross_validate(model, X, y, + cv=parameters['cross_validation_splits'], + scoring=('accuracy', 'f1_micro'), + return_train_score=True + ) + + result_processor.process_results({ + 'train_f1': np.mean(scores['train_f1_micro']), + 'train_accuracy': np.mean(scores['train_accuracy']) + }) + + result_processor.process_results({ + 'test_f1': np.mean(scores['test_f1_micro']), + 'test_accuracy': np.mean(scores['test_accuracy'])}) + + +experimenter = PyExperimenter(experiment_configuration_file_path='conditional_example.yml', name="SVM_experimenter_01") + +combinations = [{'kernel': 'rbf', 'gamma': gamma, 'degree': None, 'coef0': None} for gamma in ['0.1', '0.3']] +combinations += [{'kernel': 'poly', 'gamma': gamma, 'degree': degree, 'coef0': coef0} + for gamma in ['0.1', '0.3'] for degree in ['3', '4'] for coef0 in ['0.0', '0.1']] +combinations += [{'kernel': 'linear', 'gamma': None, 'degree': None, 'coef0': None}] + +# Fill experimenter +experimenter.fill_table_from_combination(parameters={'seed': ['1', '2', '3', '4', '5'], + 'dataset': ['iris'], + 'cross_validation_splits': ['5']}, + fixed_parameter_combinations=combinations) + +# showing database table +experimenter.get_table() + +# %% [markdown] +# ### Execute PyExperimenter +# All experiments are executed one after the other by the same `PyExperimenter` due to `max_experiments=-1`. If just a single one or a predifined number of experiments should be executed, the `-1` has to be replaced by the according amount. +# The first parameter, i.e. `run_svm`, relates to the actual method that should be executed with the given keyfields of the table. + +# %% +experimenter.execute(run_svm, max_experiments=-1) + +# showing database table +experimenter.get_table() + +# %% [markdown] +# ### CodeCarbon +# Note that `CodeCarbon` is activated by default, collecting information about the carbon emissions of each experiment. Have a look at our [general usage example](https://tornede.github.io/py_experimenter/examples/example_general_usage.html) and the according [documentation of CodeCarbon fields](https://tornede.github.io/py_experimenter/usage.html#codecarbon-fields) for more information. diff --git a/conditional_example.yml b/conditional_example.yml new file mode 100644 index 00000000..6d180444 --- /dev/null +++ b/conditional_example.yml @@ -0,0 +1,25 @@ +PY_EXPERIMENTER: + n_jobs: 1 + + Database: + provider: sqlite + database: py_experimenter + table: + name: example_conditional_grid + keyfields: + - dataset + - cross_validation_splits: int + - seed: int + - kernel + - gamma: DECIMAL + - degree: int + - coef0: DECIMAL + result_timestamps: false + resultfields: + - train_f1: DECIMAL + - train_accuracy: DECIMAL + - test_f1: DECIMAL + - test_accuracy: DECIMAL + + CUSTOM: + path: sample_data diff --git a/example.yml b/example.yml new file mode 100644 index 00000000..476b18e5 --- /dev/null +++ b/example.yml @@ -0,0 +1,46 @@ +PY_EXPERIMENTER: + n_jobs: 1 + + Database: + provider: sqlite + database: py_experimenter + table: + name: example_general_usage + keyfields: + - dataset + - cross_validation_splits: int + - seed: int + - kernel + result_timestamps: False + resultfields: + - pipeline: LONGTEXT + - train_f1: DECIMAL + - train_accuracy: DECIMAL + - test_f1: DECIMAL + - test_accuracy: DECIMAL + + Experiments: + dataset: + - iris + cross_validation_splits: + - 5 + seed: + - 2 + - 4 + - 6 + kernel: + - linear + - poly + - rbf + - sigmoid + + Custom: + datapath: sample_data + + CodeCarbon: + offline_mode: False + measure_power_secs: 25 + tracking_mode: process + log_level: error + save_to_file: True + output_dir: output/CodeCarbon \ No newline at end of file diff --git a/general_example.py b/general_example.py new file mode 100644 index 00000000..66ff97e2 --- /dev/null +++ b/general_example.py @@ -0,0 +1,151 @@ +# %% [markdown] +# # Example: General Usage +# +# This example shows the general usage of `PyExperimenter`, from creating an experiment configuration file, over the actual execution of (dummy) experiments, to the extraction of experimental results. +# +# To execute this notebook you need to install: +# ``` +# pip install py_experimenter +# pip install scikit-learn +# ``` + +# %% [markdown] +# ## Experiment Configuration File +# This notebook shows an example execution of `PyExperimenter` based on an experiment configuration file. Further explanation about the usage of `PyExperimenter` can be found in the [documentation](https://tornede.github.io/py_experimenter/usage.html). + +# %% +from py_experimenter.experimenter import PyExperimenter +from py_experimenter.result_processor import ResultProcessor +from sklearn.svm import SVC +from sklearn.preprocessing import StandardScaler +from sklearn.pipeline import make_pipeline +from sklearn.model_selection import cross_validate +from sklearn.datasets import load_iris +import numpy as np +import random +import os + + +def run_ml(parameters: dict, result_processor: ResultProcessor, custom_config: dict): + seed = parameters['seed'] + random.seed(seed) + np.random.seed(seed) + + data = load_iris() + # In case you want to load a file from a path + # path = os.path.join(custom_config['path'], parameters['dataset']) + # data = pd.read_csv(path) + + X = data.data + y = data.target + + model = make_pipeline(StandardScaler(), SVC(kernel=parameters['kernel'], gamma='auto')) + result_processor.process_results({ + 'pipeline': str(model) + }) + + if parameters['dataset'] != 'iris': + raise ValueError("Example error") + + scores = cross_validate(model, X, y, + cv=parameters['cross_validation_splits'], + scoring=('accuracy', 'f1_micro'), + return_train_score=True + ) + + result_processor.process_results({ + 'train_f1': np.mean(scores['train_f1_micro']), + 'train_accuracy': np.mean(scores['train_accuracy']) + }) + + result_processor.process_results({ + 'test_f1': np.mean(scores['test_f1_micro']), + 'test_accuracy': np.mean(scores['test_accuracy']) + }) + + +experimenter = PyExperimenter(experiment_configuration_file_path="example.yml", name='example_notebook') + + +# %% +experimenter.fill_table_from_config() + +experimenter.fill_table_with_rows(rows=[ + {'dataset': 'error_dataset', 'cross_validation_splits': 3, 'seed': 42, 'kernel': 'linear'}]) + +# showing database table +experimenter.get_table() + +# %% [markdown] +# ### Execute PyExperimenter +# All experiments are executed one after the other by the same `PyExperimenter` due to `max_experiments=-1`. If just a single one or a predifined number of experiments should be executed, the `-1` has to be replaced by the according amount +# +# The first parameter, i.e. `run_ml`, relates to the actual method that should be executed with the given keyfields of the table. + +# %% +experimenter.execute(run_ml, max_experiments=-1) + +# showing database table +experimenter.get_table() + +# %% [markdown] +# ### Restart Failed Experiments +# +# As experiments fail at some time, those experiments were reset for another try with `reset_experiments()`. The `status` describes which table rows should be replace. In this example all failed experiments, i.e. having `status==error`, are reset. Experiments can also be reset based on multiple status by simply passing a list of status, e.g. `experimenter.reset_experiments('error', 'done')`. In that case, all experiments with status 'error' or 'done' will be reset. + +# %% +experimenter.reset_experiments('error') + +# showing database table +experimenter.get_table() + +# %% [markdown] +# After the reset of failed experiments, they can be executed again as described above. + +# %% +experimenter.execute(run_ml, max_experiments=-1) + +# showing database table +experimenter.get_table() + +# %% [markdown] +# ### Generating Result Table +# +# +# The table containes single experiment results. Those can be aggregated, e.g. to generate the mean over all seeds. + +# %% +result_table_agg = experimenter.get_table().groupby(['dataset']).mean(numeric_only=True) +result_table_agg + +# %% [markdown] +# ### Printing LaTex Table +# +# As `pandas.Dataframe`s can easily be printed as LaTex table, here is an example code for one of the above result columns. + +# %% +print(result_table_agg[['test_f1']].style.to_latex()) + +# %% [markdown] +# ### CodeCarbon +# [CodeCarbon](https://tornede.github.io/py_experimenter/usage/experiment_configuration_file.html#codecarbon) is integrated into `PyExperimenter` to provide information about the carbon emissions of experiments. `CodeCarbon` will create a table with suffix `_codecarbon` in the database, each row containing information about the carbon emissions of a single experiment. + +# %% +experimenter.get_codecarbon_table() + +# %% [markdown] +# #### Aggregating CodeCarbon Results +# +# The carbon emission information of `CodeCarbon` can be easily aggregated via `pandas.Dataframe`. + +# %% +carbon_emissions = experimenter.get_codecarbon_table().groupby(['project_name']).sum(numeric_only=True) +carbon_emissions + +# %% [markdown] +# #### Printing CodeCarbon Results as LaTex Table +# +# Furthermore, the resulting `pandas.Dataframe` can easily be printed as LaTex table. + +# %% +print(carbon_emissions[['energy_consumed_kw', 'emissions_kg']].style.to_latex()) diff --git a/logtable_example.py b/logtable_example.py new file mode 100644 index 00000000..e781c02a --- /dev/null +++ b/logtable_example.py @@ -0,0 +1,101 @@ +from py_experimenter.experimenter import PyExperimenter +from py_experimenter.result_processor import ResultProcessor +from sklearn.svm import SVC +from sklearn.preprocessing import StandardScaler +from sklearn.pipeline import make_pipeline +from sklearn.model_selection import cross_validate +from sklearn.datasets import load_iris +import numpy as np +import random +import os + +def run_ml(parameters: dict, result_processor: ResultProcessor, custom_config: dict): + seed = parameters['seed'] + + # Initalize variables + performance_f1 = 0 + best_kernel_f1 = '' + performance_accuracy = 0 + best_kernel_accuracy = '' + + for kernel in ['linear', 'poly', 'rbf', 'sigmoid']: + # Set seed for reproducibility + random.seed(seed) + np.random.seed(seed) + + data = load_iris() + X = data.data + y = data.target + + model = make_pipeline(StandardScaler(), SVC(kernel=kernel, gamma='auto')) + scores = cross_validate(model, X, y, + cv=parameters['cross_validation_splits'], + scoring=('accuracy', 'f1_micro'), + return_train_score=True + ) + + # Log scores to logtables + result_processor.process_logs( + { + 'train_scores': { + 'f1': np.mean(scores['train_f1_micro']), + 'accuracy': np.mean(scores['train_accuracy']), + 'kernel': "'" + kernel + "'" + }, + 'test_f1': { + 'test_f1': np.mean(scores['test_f1_micro'])}, + 'test_accuracy': { + 'test_accuracy': np.mean(scores['test_accuracy'])}, + } + ) + + if np.mean(scores['test_f1_micro']) > performance_f1: + performance_f1 = np.mean(scores['test_f1_micro']) + best_kernel_f1 = kernel + if np.mean(scores['test_accuracy']) > performance_accuracy: + performance_accuracy = np.mean(scores['test_accuracy']) + best_kernel_accuracy = kernel + + result_processor.process_results({ + 'best_kernel_f1': best_kernel_f1, + 'best_kernel_accuracy': best_kernel_accuracy + }) + +experimenter = PyExperimenter(experiment_configuration_file_path='logtable_example.yml', name='example_notebook') +experimenter.fill_table_from_config() + +experimenter.get_table() + +# %% +# Read one of the logtables +experimenter.get_logtable('train_scores') + +# %% [markdown] +# ## Run Experiments +# +# All experiments are executed sequentially by the same `PyExperimenter` due to `max_experiments=-1` and the implicit `n_jobs=1` as no amount of jobs is specified in the configuration file. If just a single one or a predifined number of experiments should be executed, the `-1` has to be replaced by the corresponding amount. +# +# The first parameter, i.e. `run_ml`, relates to the actual method that should be executed with the given keyfields of the table. + +# %% +experimenter.execute(run_ml, max_experiments=-1) + +# %% [markdown] +# ## Check Results +# The content of all database tables having keyfields and resultfields, as well as every logtable can be easily obtained. + +# %% +experimenter.get_table() + +# %% +experimenter.get_logtable('train_scores') + +# %% +experimenter.get_logtable('test_f1') + +# %% +experimenter.get_logtable('test_accuracy') + +# %% [markdown] +# ### CodeCarbon +# Note that `CodeCarbon` is activated by default, collecting information about the carbon emissions of each experiment. Have a look at our [general usage example](https://tornede.github.io/py_experimenter/examples/example_general_usage.html) and the according [documentation of CodeCarbon fields](https://tornede.github.io/py_experimenter/usage.html#codecarbon-fields) for more information. diff --git a/logtable_example.yml b/logtable_example.yml new file mode 100644 index 00000000..1897a3c6 --- /dev/null +++ b/logtable_example.yml @@ -0,0 +1,37 @@ +PY_EXPERIMENTER: + n_jobs : 1 + + Database: + provider: sqlite + database: py_experimenter + table: + name: example_logtables + keyfields: + - dataset + - cross_validation_splits: int + - seed: int + result_timestamps: false + resultfields: + - best_kernel_f1: VARCHAR(50) + - best_kernel_accuracy: VARCHAR(50) + logtables: + train_scores: + - f1: DOUBLE + - accuracy: DOUBLE + - kernel: VARCHAR(50) + test_f1: DOUBLE + test_accuracy: DOUBLE + Experiments: + dataset: + - iris + cross_validation_splits: + - 5 + seed: + - 1 + - 2 + - 3 + - 4 + - 5 + + CUSTOM: + path: sample_data diff --git a/test/test_codecarbon/configs/integration_test_mysql.yml b/test/test_codecarbon/configs/integration_test_mysql.yml new file mode 100644 index 00000000..1b22888f --- /dev/null +++ b/test/test_codecarbon/configs/integration_test_mysql.yml @@ -0,0 +1,29 @@ +PY_EXPERIMENTER: + n_jobs: 1 + + Database: + provider: mysql + database: py_experimenter + table: + name: integration_test_sqlite + keyfields: + - dataset + - cross_validation_splits: int + - seed: int + - kernel + result_timestamps: false + resultfields: + - pipeline: LONGTEXT + - train_f1: DECIMAL + - train_accuracy: DECIMAL + - test_f1: DECIMAL + - test_accuracy: DECIMAL + Experiments: + seed: [2,4,6] + dataset: iris + cross_validation_splits: 5 + kernel: + - linear + - poly + - rbf + - sigmoid \ No newline at end of file diff --git a/test/test_codecarbon/configs/integration_test_sqlite.yml b/test/test_codecarbon/configs/integration_test_sqlite.yml new file mode 100644 index 00000000..4a53f1b6 --- /dev/null +++ b/test/test_codecarbon/configs/integration_test_sqlite.yml @@ -0,0 +1,29 @@ +PY_EXPERIMENTER: + n_jobs: 1 + + Database: + provider: sqlite + database: py_experimenter + table: + name: integration_test_sqlite + keyfields: + - dataset + - cross_validation_splits: int + - seed: int + - kernel + result_timestamps: false + resultfields: + - pipeline: LONGTEXT + - train_f1: DECIMAL + - train_accuracy: DECIMAL + - test_f1: DECIMAL + - test_accuracy: DECIMAL + Experiments: + seed: [2,4,6] + dataset: iris + cross_validation_splits: 5 + kernel: + - linear + - poly + - rbf + - sigmoid \ No newline at end of file diff --git a/test/test_codecarbon/configs/test_config_mysql.yml b/test/test_codecarbon/configs/test_config_mysql.yml new file mode 100644 index 00000000..32c802db --- /dev/null +++ b/test/test_codecarbon/configs/test_config_mysql.yml @@ -0,0 +1,31 @@ +PY_EXPERIMENTER: + n_jobs: 5 + + Database: + provider: sqlite + database: py_experimenter + table: + name: example_logtables + keyfields: + - dataset + - cross_validation_splits: int + - seed: int + result_timestamps: false + resultfields: + - best_kernel_f1: VARCHAR(50) + - best_kernel_accuracy: VARCHAR(50) + logtables: + train_scores: + f1: DOUBLE + accuracy: DOUBLE + kernel: VARCHAR(50) + test_f1: DOUBLE + test_accuracy: DOUBLE + + Experiments: + seed: [1,2,3,4,5] + dataset: iris + cross_validation_splits: 5 + + Custom: + path: sample_data \ No newline at end of file diff --git a/test/test_codecarbon/configs/test_config_sqlite.yml b/test/test_codecarbon/configs/test_config_sqlite.yml new file mode 100644 index 00000000..32c802db --- /dev/null +++ b/test/test_codecarbon/configs/test_config_sqlite.yml @@ -0,0 +1,31 @@ +PY_EXPERIMENTER: + n_jobs: 5 + + Database: + provider: sqlite + database: py_experimenter + table: + name: example_logtables + keyfields: + - dataset + - cross_validation_splits: int + - seed: int + result_timestamps: false + resultfields: + - best_kernel_f1: VARCHAR(50) + - best_kernel_accuracy: VARCHAR(50) + logtables: + train_scores: + f1: DOUBLE + accuracy: DOUBLE + kernel: VARCHAR(50) + test_f1: DOUBLE + test_accuracy: DOUBLE + + Experiments: + seed: [1,2,3,4,5] + dataset: iris + cross_validation_splits: 5 + + Custom: + path: sample_data \ No newline at end of file diff --git a/test/test_codecarbon/test_codecarbon_core_functions_mysql.py b/test/test_codecarbon/test_codecarbon_core_functions_mysql.py index 895ff739..464eb5b3 100644 --- a/test/test_codecarbon/test_codecarbon_core_functions_mysql.py +++ b/test/test_codecarbon/test_codecarbon_core_functions_mysql.py @@ -11,35 +11,13 @@ @pytest.fixture(scope='module') def experimenter_mysql(): # Create config directory if it does not exist + # Create config directory if it does not exist if not os.path.exists('config'): os.mkdir('config') - # Create config file - content = """ - [PY_EXPERIMENTER] - provider = mysql - database = py_experimenter - table = example_logtables - - keyfields = dataset, cross_validation_splits:int, seed:int - dataset = iris - cross_validation_splits = 5 - seed = 1,2,3,4,5 - - resultfields = best_kernel_f1:VARCHAR(50), best_kernel_accuracy:VARCHAR(50) - resultfields.timestamps = false - - logtables = train_scores:log_train_scores, test_f1:DOUBLE, test_accuracy:DOUBLE - log_train_scores = f1:DOUBLE, accuracy:DOUBLE, kernel:VARCHAR(50) - - [CUSTOM] - path = sample_data - """ - experiment_configuration = os.path.join('config', 'example_logtables.cfg') - with open(experiment_configuration, "w") as f: - f.write(content) + configuration_path = os.path.join('test', 'test_codecarbon', 'configs','test_config_mysql.yml') - experimenter = PyExperimenter(experiment_configuration_file_path=experiment_configuration, name='example_notebook') + experimenter = PyExperimenter(experiment_configuration_file_path=configuration_path,) yield experimenter experimenter.delete_table() @@ -53,12 +31,11 @@ def test_delete_table_mysql(experimenter_mysql): with patch.object(DatabaseConnector, 'execute', return_value=None) as mock_execute: experimenter_mysql.delete_table() - assert mock_execute.call_count == 5 - assert mock_execute.call_args_list[0][0][1] == 'DROP TABLE IF EXISTS example_logtables__train_scores' - assert mock_execute.call_args_list[1][0][1] == 'DROP TABLE IF EXISTS example_logtables__test_f1' - assert mock_execute.call_args_list[2][0][1] == 'DROP TABLE IF EXISTS example_logtables__test_accuracy' - assert mock_execute.call_args_list[3][0][1] == 'DROP TABLE IF EXISTS example_logtables_codecarbon' - assert mock_execute.call_args_list[4][0][1] == 'DROP TABLE IF EXISTS example_logtables' + assert mock_execute.call_count == 4 + assert mock_execute.call_args_list[0][0][1] == 'DROP TABLE IF EXISTS example_logtables__test_f1' + assert mock_execute.call_args_list[1][0][1] == 'DROP TABLE IF EXISTS example_logtables__test_accuracy' + assert mock_execute.call_args_list[2][0][1] == 'DROP TABLE IF EXISTS example_logtables_codecarbon' + assert mock_execute.call_args_list[3][0][1] == 'DROP TABLE IF EXISTS example_logtables' def test_get_table_mysql(experimenter_mysql): diff --git a/test/test_codecarbon/test_codecarbon_core_functions_sqlite.py b/test/test_codecarbon/test_codecarbon_core_functions_sqlite.py index 3da5af55..ae0043d0 100644 --- a/test/test_codecarbon/test_codecarbon_core_functions_sqlite.py +++ b/test/test_codecarbon/test_codecarbon_core_functions_sqlite.py @@ -6,7 +6,7 @@ from py_experimenter.database_connector import DatabaseConnector from py_experimenter.experimenter import PyExperimenter - +from omegaconf import OmegaConf @pytest.fixture(scope='module') def experimenter_sqlite(): @@ -14,32 +14,9 @@ def experimenter_sqlite(): if not os.path.exists('config'): os.mkdir('config') - # Create config file - content = """ - [PY_EXPERIMENTER] - provider = sqlite - database = py_experimenter - table = example_logtables - - keyfields = dataset, cross_validation_splits:int, seed:int - dataset = iris - cross_validation_splits = 5 - seed = 1,2,3,4,5 - - resultfields = best_kernel_f1:VARCHAR(50), best_kernel_accuracy:VARCHAR(50) - resultfields.timestamps = false - - logtables = train_scores:log_train_scores, test_f1:DOUBLE, test_accuracy:DOUBLE - log_train_scores = f1:DOUBLE, accuracy:DOUBLE, kernel:VARCHAR(50) - - [CUSTOM] - path = sample_data - """ - experiment_configuration = os.path.join('config', 'example_logtables.cfg') - with open(experiment_configuration, "w") as f: - f.write(content) + configuration_path = os.path.join('test', 'test_codecarbon', 'configs', 'test_config_sqlite.yml') - experimenter = PyExperimenter(experiment_configuration_file_path=experiment_configuration, name='example_notebook') + experimenter = PyExperimenter(experiment_configuration_file_path=configuration_path,) yield experimenter experimenter.delete_table() @@ -53,12 +30,11 @@ def test_delete_table_sqlite(experimenter_sqlite): with patch.object(DatabaseConnector, 'execute', return_value=None) as mock_execute: experimenter_sqlite.delete_table() - assert mock_execute.call_count == 5 - assert mock_execute.call_args_list[0][0][1] == 'DROP TABLE IF EXISTS example_logtables__train_scores' - assert mock_execute.call_args_list[1][0][1] == 'DROP TABLE IF EXISTS example_logtables__test_f1' - assert mock_execute.call_args_list[2][0][1] == 'DROP TABLE IF EXISTS example_logtables__test_accuracy' - assert mock_execute.call_args_list[3][0][1] == 'DROP TABLE IF EXISTS example_logtables_codecarbon' - assert mock_execute.call_args_list[4][0][1] == 'DROP TABLE IF EXISTS example_logtables' + assert mock_execute.call_count == 4 + assert mock_execute.call_args_list[0][0][1] == 'DROP TABLE IF EXISTS example_logtables__test_f1' + assert mock_execute.call_args_list[1][0][1] == 'DROP TABLE IF EXISTS example_logtables__test_accuracy' + assert mock_execute.call_args_list[2][0][1] == 'DROP TABLE IF EXISTS example_logtables_codecarbon' + assert mock_execute.call_args_list[3][0][1] == 'DROP TABLE IF EXISTS example_logtables' def test_get_table_sqlite(experimenter_sqlite): diff --git a/test/test_codecarbon/test_integration_mysql.py b/test/test_codecarbon/test_integration_mysql.py index a9cad282..b45bfec7 100644 --- a/test/test_codecarbon/test_integration_mysql.py +++ b/test/test_codecarbon/test_integration_mysql.py @@ -1,6 +1,5 @@ import random -import tempfile - +import os import numpy as np import pytest @@ -10,31 +9,9 @@ @pytest.fixture def experimenter(): - content = """ - [PY_EXPERIMENTER] - provider = mysql - database = py_experimenter - table = integration_test_mysql - - keyfields = dataset, cross_validation_splits:int, seed:int, kernel - dataset = iris - cross_validation_splits = 5 - seed = 2:6:2 - kernel = linear, poly, rbf, sigmoid - - resultfields = pipeline:LONGTEXT, train_f1:DECIMAL, train_accuracy:DECIMAL, test_f1:DECIMAL, test_accuracy:DECIMAL - resultfields.timestamps = false - - [CUSTOM] - path = sample_data - """ - - # Create temporary experiment configuration file - with tempfile.NamedTemporaryFile(mode='w', delete=False) as f: - f.write(content) - experiment_configuration = f.name - - return PyExperimenter(experiment_configuration) + configuration_path = os.path.join('test', 'test_codecarbon', 'configs', 'integration_test_sqlite.yml') + + return PyExperimenter(configuration_path) def run_ml(parameters: dict, result_processor: ResultProcessor, custom_config: dict): diff --git a/test/test_codecarbon/test_integration_sqlite.py b/test/test_codecarbon/test_integration_sqlite.py index e680bc97..e23554c4 100644 --- a/test/test_codecarbon/test_integration_sqlite.py +++ b/test/test_codecarbon/test_integration_sqlite.py @@ -1,5 +1,5 @@ +import os import random -import tempfile import numpy as np import pytest @@ -10,31 +10,9 @@ @pytest.fixture def experimenter(): - content = """ - [PY_EXPERIMENTER] - provider = sqlite - database = py_experimenter - table = integration_test_mysql + configuration_path = os.path.join('test', 'test_codecarbon', 'configs', 'integration_test_sqlite.yml') - keyfields = dataset, cross_validation_splits:int, seed:int, kernel - dataset = iris - cross_validation_splits = 5 - seed = 2:6:2 - kernel = linear, poly, rbf, sigmoid - - resultfields = pipeline:LONGTEXT, train_f1:DECIMAL, train_accuracy:DECIMAL, test_f1:DECIMAL, test_accuracy:DECIMAL - resultfields.timestamps = false - - [CUSTOM] - path = sample_data - """ - - # Create temporary experiment configuration file - with tempfile.NamedTemporaryFile(mode='w', delete=False) as f: - f.write(content) - experiment_configuration = f.name - - return PyExperimenter(experiment_configuration) + return PyExperimenter(configuration_path) def run_ml(parameters: dict, result_processor: ResultProcessor, custom_config: dict):