refactoring

2020-02-04 23:44:07 +09:00 · 2020-02-04 23:44:07 +09:00 · 4468563cd6
parent 5a6f553de6
commit 4468563cd6
11 changed files with 269 additions and 251 deletions
--- a/README.md
+++ b/README.md
@ -28,8 +28,8 @@ $pip install nyaggle
 ## Examples

 ### Experiment Logging
-`experiment_gbdt()` is an high-level API for cross validation using 
-gradient boosting algorithm. It outputs parameters, metrics, out of fold predictions, test predictions, 
+`run_experiment()` is an high-level API for experiment with cross validation.
+It outputs parameters, metrics, out of fold predictions, test predictions,
 feature importance and submission.csv under the specified directory.

 It can be combined with mlflow tracking.
@ -48,10 +48,10 @@ params = {
    'max_depth': 8
 }

-result = experiment(params,
-                    X_train,
-                    y_train,
-                    X_test)
+result = run_experiment(params,
+                        X_train,
+                        y_train,
+                        X_test)
                         
 # You can get outputs that needed in data science competitions with 1 API

@ -67,11 +67,11 @@ print(result.submission_df)    # The output dataframe saved as submission.csv


 # You can use it with mlflow and track your experiments through mlflow-ui
-result = experiment(params,
-                    X_train,
-                    y_train,
-                    X_test,
-                    with_mlflow=True)
+result = run_experiment(params,
+                        X_train,
+                        y_train,
+                        X_test,
+                        with_mlflow=True)
 ```


--- a/examples/kaggle-bnp-paribas/kaggle_bnp_paribas.py
+++ b/examples/kaggle-bnp-paribas/kaggle_bnp_paribas.py
@ -2,7 +2,7 @@ import argparse
 import pandas as pd

 from sklearn.metrics import log_loss
-from nyaggle.experiment import experiment
+from nyaggle.experiment import run_experiment


 if __name__ == "__main__":
@ -24,8 +24,8 @@ if __name__ == "__main__":
        'task_type': 'GPU' if args.gpu else 'CPU'
    }

-    result = experiment(cat_params, X_train, y_train, X_test, logging_directory='bnp-paribas-{time}',
-                        eval_func=log_loss,
-                        algorithm_type='cat',
-                        sample_submission=pd.read_csv('sample_submission.csv'),
-                        with_mlflow=True)
+    result = run_experiment(cat_params, X_train, y_train, X_test, logging_directory='bnp-paribas-{time}',
+                            eval_func=log_loss,
+                            algorithm_type='cat',
+                            sample_submission=pd.read_csv('sample_submission.csv'),
+                            with_mlflow=True)
--- a/examples/kaggle-days-tokyo/kaggle_days_tokyo.py
+++ b/examples/kaggle-days-tokyo/kaggle_days_tokyo.py
@ -2,7 +2,7 @@ import pandas as pd
 from sklearn.metrics import mean_squared_error
 from sklearn.model_selection import GroupKFold

-from nyaggle.experiment import experiment
+from nyaggle.experiment import run_experiment
 from nyaggle.feature.category_encoder import TargetEncoder

 lgb_params = {
@ -39,13 +39,13 @@ X_train, y_train = transform(te, X_train, y_train)
 X_test, _ = transform(te, X_test, None)

 # generated submission.csv scores 11.61445 in private LB (35th)
-experiment(logging_directory='baseline_kaggledays_tokyo',
-           model_params=lgb_params,
-           X_train=X_train,
-           y=y_train,
-           X_test=X_test,
-           eval_func=mean_squared_error,
-           type_of_target='continuous',
-           overwrite=True,
-           with_auto_hpo=True,
-           sample_submission=pd.read_csv('sample_submission.csv'))
+run_experiment(logging_directory='baseline_kaggledays_tokyo',
+               model_params=lgb_params,
+               X_train=X_train,
+               y=y_train,
+               X_test=X_test,
+               eval_func=mean_squared_error,
+               type_of_target='continuous',
+               overwrite=True,
+               with_auto_hpo=True,
+               sample_submission=pd.read_csv('sample_submission.csv'))
--- a/examples/kaggle-plasticc/kaggle_plasticc.py
+++ b/examples/kaggle-plasticc/kaggle_plasticc.py
@ -1,7 +1,7 @@
 import pandas as pd

 from sklearn.model_selection import StratifiedKFold
-from nyaggle.experiment import experiment
+from nyaggle.experiment import run_experiment


 meta = pd.read_csv('training_set_metadata.csv')
@ -18,9 +18,9 @@ lgb_param_extra = {

 skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

-result_extra = experiment(lgb_param_extra,
-                          meta_extra.drop('target', axis=1),
-                          meta_extra['target'],
-                          logging_directory='plasticc-{time}',
-                          cv=skf,
-                          type_of_target='multiclass')
+result_extra = run_experiment(lgb_param_extra,
+                              meta_extra.drop('target', axis=1),
+                              meta_extra['target'],
+                              logging_directory='plasticc-{time}',
+                              cv=skf,
+                              type_of_target='multiclass')
--- a/examples/wine-quality/wine-quality.py
+++ b/examples/wine-quality/wine-quality.py
@ -1,7 +1,7 @@
 import pandas as pd
 from sklearn.model_selection import train_test_split

-from nyaggle.experiment import experiment
+from nyaggle.experiment import run_experiment


 csv_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
@ -20,11 +20,11 @@ params = {
    'reg_alpha': 0.1
 }

-result = experiment(params,
-                    X_train,
-                    y_train,
-                    X_test,
+result = run_experiment(params,
+                        X_train,
+                        y_train,
+                        X_test,
                         './wine-quality-{time}',
-                    type_of_target='continuous',
-                    with_mlflow=True,
-                    with_auto_hpo=True)
+                        type_of_target='continuous',
+                        with_mlflow=True,
+                        with_auto_hpo=True)
--- a/nyaggle/experiment/init.py
+++ b/nyaggle/experiment/init.py
@ -1,3 +1,3 @@
 from nyaggle.experiment.experiment import Experiment, add_leaderboard_score
 from nyaggle.experiment.averaging import average_results
-from nyaggle.experiment.gbdt import autoprep_gbdt, experiment, find_best_lgbm_parameter
+from nyaggle.experiment.run import autoprep_gbdt, run_experiment, find_best_lgbm_parameter
--- a/nyaggle/experiment/auto_prep.py
+++ b/nyaggle/experiment/auto_prep.py
@ -0,0 +1,63 @@
+from typing import List, Optional, Tuple, Type, Union
+
+import pandas as pd
+from catboost import CatBoost
+from lightgbm import LGBMModel
+from pandas.api.types import is_integer_dtype, is_categorical
+from sklearn.preprocessing import LabelEncoder
+from xgboost import XGBModel
+
+GBDTModel = Union[CatBoost, LGBMModel, XGBModel]
+
+
+def autoprep_gbdt(model: Type[GBDTModel], X_train: pd.DataFrame, X_test: Optional[pd.DataFrame],
+                  categorical_feature_to_treat: Optional[List[str]] = None) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    if categorical_feature_to_treat is None:
+        categorical_feature_to_treat = [c for c in X_train.columns if X_train[c].dtype.name in ['object', 'category']]
+
+    # LightGBM:
+    # Can handle categorical dtype. Otherwise, int, float or bool is acceptable for categorical columns.
+    # https://lightgbm.readthedocs.io/en/latest/Advanced-Topics.html#categorical-feature-support
+    #
+    # CatBoost:
+    # int, float, bool or str is acceptable for categorical columns. NaN should be filled.
+    # https://catboost.ai/docs/concepts/faq.html#why-float-and-nan-values-are-forbidden-for-cat-features
+    #
+    # XGBoost:
+    # All categorical column should be encoded beforehand.
+
+    if issubclass(model, LGBMModel):
+        # LightGBM can handle categorical dtype natively
+        categorical_feature_to_treat = [c for c in categorical_feature_to_treat if not is_categorical(X_train[c])]
+
+    if issubclass(model, CatBoost) and len(categorical_feature_to_treat) > 0:
+        X_train = X_train.copy()
+        X_test = X_test.copy() if X_test is not None else X_train.iloc[:1, :].copy()  # dummy
+        for c in categorical_feature_to_treat:
+            X_train[c], X_test[c] = _fill_na_by_unique_value(X_train[c], X_test[c])
+
+    if issubclass(model, (LGBMModel, XGBModel)) and len(categorical_feature_to_treat) > 0:
+        assert X_test is not None, "X_test is required for XGBoost with categorical variables"
+        X_train = X_train.copy()
+        X_test = X_test.copy()
+
+        for c in categorical_feature_to_treat:
+            X_train[c], X_test[c] = _fill_na_by_unique_value(X_train[c], X_test[c])
+            le = LabelEncoder()
+            concat = np.concatenate([X_train[c].values, X_test[c].values])
+            concat = le.fit_transform(concat)
+            X_train[c] = concat[:len(X_train)]
+            X_test[c] = concat[len(X_train):]
+
+    return X_train, X_test
+
+
+def _fill_na_by_unique_value(strain: pd.Series, stest: Optional[pd.Series]) -> Tuple[pd.Series, pd.Series]:
+    if is_categorical(strain):
+        return strain.cat.codes, stest.cat.codes
+    elif is_integer_dtype(strain.dtype):
+        fillval = min(strain.min(), stest.min()) - 1
+        return strain.fillna(fillval), stest.fillna(fillval)
+    else:
+        return strain.astype(str), stest.astype(str)
+
--- a/nyaggle/experiment/hyperparameter_tuner.py
+++ b/nyaggle/experiment/hyperparameter_tuner.py
@ -0,0 +1,87 @@
+import copy
+from typing import Dict, Iterable, Optional, Union
+
+import pandas as pd
+import optuna.integration.lightgbm as optuna_lgb
+import sklearn.utils.multiclass as multiclass
+from sklearn.model_selection import BaseCrossValidator
+
+from nyaggle.validation.split import check_cv
+
+
+def find_best_lgbm_parameter(base_param: Dict, X: pd.DataFrame, y: pd.Series,
+                             cv: Optional[Union[int, Iterable, BaseCrossValidator]] = None,
+                             groups: Optional[pd.Series] = None,
+                             time_budget: Optional[int] = None,
+                             type_of_target: str = 'auto') -> Dict:
+    """
+    Search hyperparameter for lightgbm using optuna.
+
+    Args:
+        base_param:
+            Base parameters passed to lgb.train.
+        X:
+            Training data.
+        y:
+            Target
+        cv:
+            int, cross-validation generator or an iterable which determines the cross-validation splitting strategy.
+        groups:
+            Group labels for the samples. Only used in conjunction with a “Group” cv instance (e.g., ``GroupKFold``).
+        time_budget:
+            Time budget for tuning (in seconds).
+        type_of_target:
+            The type of target variable. If ``auto``, type is inferred by ``sklearn.utils.multiclass.type_of_target``.
+            Otherwise, ``binary``, ``continuous``, or ``multiclass`` are supported.
+
+    Returns:
+        The best parameters found
+    """
+    cv = check_cv(cv, y)
+
+    if type_of_target == 'auto':
+        type_of_target = multiclass.type_of_target(y)
+
+    train_index, test_index = next(cv.split(X, y, groups))
+
+    dtrain = optuna_lgb.Dataset(X.iloc[train_index], y.iloc[train_index])
+    dvalid = optuna_lgb.Dataset(X.iloc[test_index], y.iloc[test_index])
+
+    params = copy.deepcopy(base_param)
+    if 'early_stopping_rounds' not in params:
+        params['early_stopping_rounds'] = 100
+
+    if not any([p in params for p in ('num_iterations', 'num_iteration',
+                                      'num_trees', 'num_tree',
+                                      'num_rounds', 'num_round')]):
+        params['num_iterations'] = params.get('n_estimators', 10000)
+
+    if 'objective' not in params:
+        tot_to_objective = {
+            'binary': 'binary',
+            'continuous': 'regression',
+            'multiclass': 'multiclass'
+        }
+        params['objective'] = tot_to_objective[type_of_target]
+
+    if 'metric' not in params and 'objective' in params:
+        if params['objective'] in ['regression', 'regression_l2', 'l2', 'mean_squared_error', 'mse', 'l2_root',
+                                   'root_mean_squared_error', 'rmse']:
+            params['metric'] = 'l2'
+        if params['objective'] in ['regression_l1', 'l1', 'mean_absolute_error', 'mae']:
+            params['metric'] = 'l1'
+        if params['objective'] in ['binary']:
+            params['metric'] = 'binary_logloss'
+        if params['objective'] in ['multiclass']:
+            params['metric'] = 'multi_logloss'
+
+    if not any([p in params for p in ('verbose', 'verbosity')]):
+        params['verbosity'] = -1
+
+    best_params, tuning_history = dict(), list()
+    optuna_lgb.train(params, dtrain, valid_sets=[dvalid], verbose_eval=0,
+                     best_params=best_params, tuning_history=tuning_history, time_budget=time_budget)
+
+    result_param = copy.deepcopy(base_param)
+    result_param.update(best_params)
+    return result_param
--- a/nyaggle/experiment/gbdt.py
+++ b/nyaggle/experiment/gbdt.py
@ -1,26 +1,24 @@
-import copy
 import os
 import pickle
 import time
 from collections import namedtuple
 from datetime import datetime
-from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type, Union
+from typing import Any, Callable, Dict, Iterable, List, Optional, Type, Union

 import numpy as np
-import optuna.integration.lightgbm as optuna_lgb
 import pandas as pd
 import sklearn.utils.multiclass as multiclass
 from catboost import CatBoost, CatBoostClassifier, CatBoostRegressor
 from lightgbm import LGBMModel, LGBMClassifier, LGBMRegressor
-from xgboost import XGBModel, XGBClassifier, XGBRegressor
 from more_itertools import first_true
-from pandas.api.types import is_integer_dtype, is_categorical
 from sklearn.base import BaseEstimator
-from sklearn.model_selection import BaseCrossValidator
 from sklearn.metrics import roc_auc_score, mean_squared_error, log_loss
-from sklearn.preprocessing import LabelEncoder
+from sklearn.model_selection import BaseCrossValidator
+from xgboost import XGBModel, XGBClassifier, XGBRegressor

+from nyaggle.experiment.auto_prep import autoprep_gbdt
 from nyaggle.experiment.experiment import Experiment
+from nyaggle.experiment.hyperparameter_tuner import find_best_lgbm_parameter
 from nyaggle.feature_store import load_features
 from nyaggle.util import plot_importance
 from nyaggle.validation.cross_validate import cross_validate
@ -39,104 +37,26 @@ ExperimentResult = namedtuple('LGBResult',
 GBDTModel = Union[CatBoost, LGBMModel, XGBModel]


-def find_best_lgbm_parameter(base_param: Dict, X: pd.DataFrame, y: pd.Series,
-                             cv: Optional[Union[int, Iterable, BaseCrossValidator]] = None,
-                             groups: Optional[pd.Series] = None,
-                             time_budget: Optional[int] = None,
-                             type_of_target: str = 'auto') -> Dict:
-    """
-    Search hyperparameter for lightgbm using optuna.
-
-    Args:
-        base_param:
-            Base parameters passed to lgb.train.
-        X:
-            Training data.
-        y:
-            Target
-        cv:
-            int, cross-validation generator or an iterable which determines the cross-validation splitting strategy.
-        groups:
-            Group labels for the samples. Only used in conjunction with a “Group” cv instance (e.g., ``GroupKFold``).
-        time_budget:
-            Time budget for tuning (in seconds).
-        type_of_target:
-            The type of target variable. If ``auto``, type is inferred by ``sklearn.utils.multiclass.type_of_target``.
-            Otherwise, ``binary``, ``continuous``, or ``multiclass`` are supported.
-
-    Returns:
-        The best parameters found
-    """
-    cv = check_cv(cv, y)
-
-    if type_of_target == 'auto':
-        type_of_target = multiclass.type_of_target(y)
-
-    train_index, test_index = next(cv.split(X, y, groups))
-
-    dtrain = optuna_lgb.Dataset(X.iloc[train_index], y.iloc[train_index])
-    dvalid = optuna_lgb.Dataset(X.iloc[test_index], y.iloc[test_index])
-
-    params = copy.deepcopy(base_param)
-    if 'early_stopping_rounds' not in params:
-        params['early_stopping_rounds'] = 100
-
-    if not any([p in params for p in ('num_iterations', 'num_iteration',
-                                      'num_trees', 'num_tree',
-                                      'num_rounds', 'num_round')]):
-        params['num_iterations'] = params.get('n_estimators', 10000)
-
-    if 'objective' not in params:
-        tot_to_objective = {
-            'binary': 'binary',
-            'continuous': 'regression',
-            'multiclass': 'multiclass'
-        }
-        params['objective'] = tot_to_objective[type_of_target]
-
-    if 'metric' not in params and 'objective' in params:
-        if params['objective'] in ['regression', 'regression_l2', 'l2', 'mean_squared_error', 'mse', 'l2_root',
-                                   'root_mean_squared_error', 'rmse']:
-            params['metric'] = 'l2'
-        if params['objective'] in ['regression_l1', 'l1', 'mean_absolute_error', 'mae']:
-            params['metric'] = 'l1'
-        if params['objective'] in ['binary']:
-            params['metric'] = 'binary_logloss'
-        if params['objective'] in ['multiclass']:
-            params['metric'] = 'multi_logloss'
-
-    if not any([p in params for p in ('verbose', 'verbosity')]):
-        params['verbosity'] = -1
-
-    best_params, tuning_history = dict(), list()
-    optuna_lgb.train(params, dtrain, valid_sets=[dvalid], verbose_eval=0,
-                     best_params=best_params, tuning_history=tuning_history, time_budget=time_budget)
-
-    result_param = copy.deepcopy(base_param)
-    result_param.update(best_params)
-    return result_param
-
-
-def experiment(model_params: Dict[str, Any],
-               X_train: pd.DataFrame, y: pd.Series,
-               X_test: Optional[pd.DataFrame] = None,
-               logging_directory: str = 'output/{time}',
-               overwrite: bool = False,
-               eval_func: Optional[Callable] = None,
-               algorithm_type: Union[str, Type[BaseEstimator]] = 'lgbm',
-               fit_params: Optional[Union[Dict[str, Any], Callable]] = None,
-               cv: Optional[Union[int, Iterable, BaseCrossValidator]] = None,
-               groups: Optional[pd.Series] = None,
-               categorical_feature: Optional[List[str]] = None,
-               sample_submission: Optional[pd.DataFrame] = None,
-               submission_filename: Optional[str] = None,
-               type_of_target: str = 'auto',
-               feature_list: Optional[List[Union[int, str]]] = None,
-               feature_directory: Optional[str] = None,
-               with_auto_hpo: bool = False,
-               with_auto_prep: bool = False,
-               with_mlflow: bool = False
-               ):
+def run_experiment(model_params: Dict[str, Any],
+                   X_train: pd.DataFrame, y: pd.Series,
+                   X_test: Optional[pd.DataFrame] = None,
+                   logging_directory: str = 'output/{time}',
+                   overwrite: bool = False,
+                   eval_func: Optional[Callable] = None,
+                   algorithm_type: Union[str, Type[BaseEstimator]] = 'lgbm',
+                   fit_params: Optional[Union[Dict[str, Any], Callable]] = None,
+                   cv: Optional[Union[int, Iterable, BaseCrossValidator]] = None,
+                   groups: Optional[pd.Series] = None,
+                   categorical_feature: Optional[List[str]] = None,
+                   sample_submission: Optional[pd.DataFrame] = None,
+                   submission_filename: Optional[str] = None,
+                   type_of_target: str = 'auto',
+                   feature_list: Optional[List[Union[int, str]]] = None,
+                   feature_directory: Optional[str] = None,
+                   with_auto_hpo: bool = False,
+                   with_auto_prep: bool = False,
+                   with_mlflow: bool = False
+                   ):
    """
    Evaluate metrics by cross-validation and stores result
    (log, oof prediction, test prediction, feature importance plot and submission file)
@ -410,55 +330,3 @@ def _check_input(X_train: pd.DataFrame, y: pd.Series,

    if X_test is not None:
        assert list(X_train.columns) == list(X_test.columns), "columns are different between X_train and X_test"
-
-
-def _fill_na_by_unique_value(strain: pd.Series, stest: Optional[pd.Series]) -> Tuple[pd.Series, pd.Series]:
-    if is_categorical(strain):
-        return strain.cat.codes, stest.cat.codes
-    elif is_integer_dtype(strain.dtype):
-        fillval = min(strain.min(), stest.min()) - 1
-        return strain.fillna(fillval), stest.fillna(fillval)
-    else:
-        return strain.astype(str), stest.astype(str)
-
-
-def autoprep_gbdt(model: Type[GBDTModel], X_train: pd.DataFrame, X_test: Optional[pd.DataFrame],
-                  categorical_feature_to_treat: Optional[List[str]] = None) -> Tuple[pd.DataFrame, pd.DataFrame]:
-    if categorical_feature_to_treat is None:
-        categorical_feature_to_treat = [c for c in X_train.columns if X_train[c].dtype.name in ['object', 'category']]
-
-    # LightGBM:
-    # Can handle categorical dtype. Otherwise, int, float or bool is acceptable for categorical columns.
-    # https://lightgbm.readthedocs.io/en/latest/Advanced-Topics.html#categorical-feature-support
-    #
-    # CatBoost:
-    # int, float, bool or str is acceptable for categorical columns. NaN should be filled.
-    # https://catboost.ai/docs/concepts/faq.html#why-float-and-nan-values-are-forbidden-for-cat-features
-    #
-    # XGBoost:
-    # All categorical column should be encoded beforehand.
-
-    if issubclass(model, LGBMModel):
-        # LightGBM can handle categorical dtype natively
-        categorical_feature_to_treat = [c for c in categorical_feature_to_treat if not is_categorical(X_train[c])]
-
-    if issubclass(model, CatBoost) and len(categorical_feature_to_treat) > 0:
-        X_train = X_train.copy()
-        X_test = X_test.copy() if X_test is not None else X_train.iloc[:1, :].copy()  # dummy
-        for c in categorical_feature_to_treat:
-            X_train[c], X_test[c] = _fill_na_by_unique_value(X_train[c], X_test[c])
-
-    if issubclass(model, (LGBMModel, XGBModel)) and len(categorical_feature_to_treat) > 0:
-        assert X_test is not None, "X_test is required for XGBoost with categorical variables"
-        X_train = X_train.copy()
-        X_test = X_test.copy()
-
-        for c in categorical_feature_to_treat:
-            X_train[c], X_test[c] = _fill_na_by_unique_value(X_train[c], X_test[c])
-            le = LabelEncoder()
-            concat = np.concatenate([X_train[c].values, X_test[c].values])
-            concat = le.fit_transform(concat)
-            X_train[c] = concat[:len(X_train)]
-            X_test[c] = concat[len(X_train):]
-
-    return X_train, X_test
--- a/tests/experiment/test_averaging.py
+++ b/tests/experiment/test_averaging.py
@ -4,7 +4,7 @@ import tempfile
 from sklearn.metrics import roc_auc_score
 from sklearn.model_selection import train_test_split

-from nyaggle.experiment import average_results, experiment
+from nyaggle.experiment import average_results, run_experiment
 from nyaggle.testing import make_classification_df


@ -22,8 +22,8 @@ def test_averaging():
    with tempfile.TemporaryDirectory() as temp_path:
        for i in range(3):
            params['seed'] = i
-            ret_single = experiment(params, X_train, y_train, X_test,
-                                    os.path.join(temp_path, 'seed{}'.format(i)))
+            ret_single = run_experiment(params, X_train, y_train, X_test,
+                                        os.path.join(temp_path, 'seed{}'.format(i)))

        df = average_results([
            os.path.join(temp_path, 'seed{}'.format(i)) for i in range(3)
--- a/tests/experiment/test_gbdt.py
+++ b/tests/experiment/test_gbdt.py
@ -11,7 +11,7 @@ from sklearn.metrics import roc_auc_score, mean_squared_error, mean_absolute_err
 from sklearn.model_selection import GroupKFold, KFold, train_test_split
 from sklearn.neighbors import KNeighborsClassifier

-from nyaggle.experiment import experiment
+from nyaggle.experiment import run_experiment
 from nyaggle.feature_store import save_feature
 from nyaggle.testing import make_classification_df, make_regression_df, get_temp_directory

@ -33,7 +33,7 @@ def test_experiment_lgb_classifier():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path, eval_func=roc_auc_score)
+        result = run_experiment(params, X_train, y_train, X_test, temp_path, eval_func=roc_auc_score)

        assert len(np.unique(result.oof_prediction)) > 5  # making sure prediction is not binarized
        assert len(np.unique(result.test_prediction)) > 5
@ -55,7 +55,7 @@ def test_experiment_lgb_regressor():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path)
+        result = run_experiment(params, X_train, y_train, X_test, temp_path)

        assert len(np.unique(result.oof_prediction)) > 5  # making sure prediction is not binarized
        assert len(np.unique(result.test_prediction)) > 5
@ -76,7 +76,7 @@ def test_experiment_lgb_multiclass():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path)
+        result = run_experiment(params, X_train, y_train, X_test, temp_path)

        assert len(np.unique(result.oof_prediction[:, 0])) > 5  # making sure prediction is not binarized
        assert len(np.unique(result.test_prediction[:, 0])) > 5
@ -98,8 +98,8 @@ def test_experiment_cat_classifier():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path, eval_func=roc_auc_score, algorithm_type='cat',
-                            submission_filename='submission.csv', with_auto_prep=True)
+        result = run_experiment(params, X_train, y_train, X_test, temp_path, eval_func=roc_auc_score, algorithm_type='cat',
+                                submission_filename='submission.csv', with_auto_prep=True)

        assert len(np.unique(result.oof_prediction)) > 5  # making sure prediction is not binarized
        assert len(np.unique(result.test_prediction)) > 5
@ -122,7 +122,7 @@ def test_experiment_cat_regressor():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path, algorithm_type='cat')
+        result = run_experiment(params, X_train, y_train, X_test, temp_path, algorithm_type='cat')

        assert mean_squared_error(y_train, result.oof_prediction) == result.metrics[-1]
        _check_file_exists(temp_path, ('oof_prediction.npy', 'test_prediction.npy', 'metrics.txt'))
@ -140,8 +140,8 @@ def test_experiment_cat_multiclass():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path, algorithm_type='cat',
-                            type_of_target='multiclass', submission_filename='submission.csv', with_auto_prep=True)
+        result = run_experiment(params, X_train, y_train, X_test, temp_path, algorithm_type='cat',
+                                type_of_target='multiclass', submission_filename='submission.csv', with_auto_prep=True)

        assert result.oof_prediction.shape == (len(y_train), 5)
        assert result.test_prediction.shape == (len(y_test), 5)
@ -163,8 +163,8 @@ def test_experiment_xgb_classifier():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path, eval_func=roc_auc_score, algorithm_type='xgb',
-                            submission_filename='submission.csv', with_auto_prep=True)
+        result = run_experiment(params, X_train, y_train, X_test, temp_path, eval_func=roc_auc_score, algorithm_type='xgb',
+                                submission_filename='submission.csv', with_auto_prep=True)

        assert len(np.unique(result.oof_prediction)) > 5  # making sure prediction is not binarized
        assert len(np.unique(result.test_prediction)) > 5
@ -187,7 +187,7 @@ def test_experiment_xgb_regressor():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path, algorithm_type='xgb', with_auto_prep=True)
+        result = run_experiment(params, X_train, y_train, X_test, temp_path, algorithm_type='xgb', with_auto_prep=True)

        assert mean_squared_error(y_train, result.oof_prediction) == result.metrics[-1]
        _check_file_exists(temp_path, ('oof_prediction.npy', 'test_prediction.npy', 'metrics.txt'))
@ -205,9 +205,9 @@ def test_experiment_xgb_multiclass():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path, algorithm_type='xgb',
-                            type_of_target='multiclass', submission_filename='submission.csv',
-                            with_auto_prep=True)
+        result = run_experiment(params, X_train, y_train, X_test, temp_path, algorithm_type='xgb',
+                                type_of_target='multiclass', submission_filename='submission.csv',
+                                with_auto_prep=True)

        assert result.oof_prediction.shape == (len(y_train), 5)
        assert result.test_prediction.shape == (len(y_test), 5)
@ -228,8 +228,8 @@ def test_experiment_sklearn_classifier():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path, eval_func=roc_auc_score,
-                            algorithm_type=LogisticRegression, with_auto_prep=False)
+        result = run_experiment(params, X_train, y_train, X_test, temp_path, eval_func=roc_auc_score,
+                                algorithm_type=LogisticRegression, with_auto_prep=False)

        assert len(np.unique(result.oof_prediction)) > 5  # making sure prediction is not binarized
        assert len(np.unique(result.test_prediction)) > 5
@ -250,8 +250,8 @@ def test_experiment_sklearn_regressor():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path, with_auto_prep=False,
-                            algorithm_type=LinearRegression)
+        result = run_experiment(params, X_train, y_train, X_test, temp_path, with_auto_prep=False,
+                                algorithm_type=LinearRegression)

        assert len(np.unique(result.oof_prediction)) > 5  # making sure prediction is not binarized
        assert len(np.unique(result.test_prediction)) > 5
@ -271,8 +271,8 @@ def test_experiment_sklearn_multiclass():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path, algorithm_type=KNeighborsClassifier,
-                            with_auto_prep=False)
+        result = run_experiment(params, X_train, y_train, X_test, temp_path, algorithm_type=KNeighborsClassifier,
+                                with_auto_prep=False)

        assert len(np.unique(result.oof_prediction[:, 0])) > 5  # making sure prediction is not binarized
        assert len(np.unique(result.test_prediction[:, 0])) > 5
@ -295,8 +295,8 @@ def test_experiment_cat_custom_eval():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path,
-                            algorithm_type='cat', eval_func=mean_absolute_error)
+        result = run_experiment(params, X_train, y_train, X_test, temp_path,
+                                algorithm_type='cat', eval_func=mean_absolute_error)

        assert mean_absolute_error(y_train, result.oof_prediction) == result.metrics[-1]
        _check_file_exists(temp_path, ('oof_prediction.npy', 'test_prediction.npy', 'metrics.txt'))
@ -314,7 +314,7 @@ def test_experiment_without_test_data():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, None, temp_path)
+        result = run_experiment(params, X_train, y_train, None, temp_path)

        assert roc_auc_score(y_train, result.oof_prediction) >= 0.9
        _check_file_exists(temp_path, ('oof_prediction.npy', 'metrics.txt'))
@ -333,11 +333,11 @@ def test_experiment_fit_params():
    }

    with get_temp_directory() as temp_path:
-        result1 = experiment(params, X_train, y_train, X_test,
-                             temp_path, fit_params={'early_stopping_rounds': None})
+        result1 = run_experiment(params, X_train, y_train, X_test,
+                                 temp_path, fit_params={'early_stopping_rounds': None})
    with get_temp_directory() as temp_path:
-        result2 = experiment(params, X_train, y_train, X_test,
-                             temp_path, fit_params={'early_stopping_rounds': 5})
+        result2 = run_experiment(params, X_train, y_train, X_test,
+                                 temp_path, fit_params={'early_stopping_rounds': 5})

    assert result1.models[-1].booster_.num_trees() == params['n_estimators']
    assert result2.models[-1].booster_.num_trees() < params['n_estimators']
@ -355,7 +355,7 @@ def test_experiment_mlflow():
    }

    with get_temp_directory() as temp_path:
-        experiment(params, X_train, y_train, None, temp_path, with_mlflow=True)
+        run_experiment(params, X_train, y_train, None, temp_path, with_mlflow=True)

        _check_file_exists(temp_path, ('oof_prediction.npy', 'metrics.txt', 'mlflow.json'))

@ -380,13 +380,13 @@ def test_experiment_already_exists():
    }

    with get_temp_directory() as temp_path:
-        experiment(params, X_train, y_train, None, temp_path, overwrite=True)
+        run_experiment(params, X_train, y_train, None, temp_path, overwrite=True)

        # result is overwrited by default
-        experiment(params, X_train, y_train, None, temp_path, overwrite=True)
+        run_experiment(params, X_train, y_train, None, temp_path, overwrite=True)

        with pytest.raises(Exception):
-            experiment(params, X_train, y_train, None, temp_path, overwrite=False)
+            run_experiment(params, X_train, y_train, None, temp_path, overwrite=False)


 def test_submission_filename():
@ -401,7 +401,7 @@ def test_submission_filename():
    }

    with get_temp_directory() as temp_path:
-        experiment(params, X_train, y_train, X_test, temp_path, submission_filename='sub.csv')
+        run_experiment(params, X_train, y_train, X_test, temp_path, submission_filename='sub.csv')

        df = pd.read_csv(os.path.join(temp_path, 'sub.csv'))
        assert list(df.columns) == ['id', 'target']
@ -419,7 +419,7 @@ def test_experiment_manual_cv_kfold():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, None, temp_path, cv=KFold(4))
+        result = run_experiment(params, X_train, y_train, None, temp_path, cv=KFold(4))
        assert len(result.models) == 4
        assert len(result.metrics) == 4 + 1

@ -436,7 +436,7 @@ def test_experiment_manual_cv_int():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, None, temp_path, cv=KFold(2))
+        result = run_experiment(params, X_train, y_train, None, temp_path, cv=KFold(2))
        assert len(result.models) == 2
        assert len(result.metrics) == 2 + 1

@ -467,7 +467,7 @@ def test_experiment_manual_cv_group():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path, cv=GroupKFold(2), groups=grp)
+        result = run_experiment(params, X_train, y_train, X_test, temp_path, cv=GroupKFold(2), groups=grp)
        assert result.metrics[-1] < 0.7


@ -485,7 +485,7 @@ def test_experiment_sample_submission_binary():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path, sample_submission=sample_df)
+        result = run_experiment(params, X_train, y_train, X_test, temp_path, sample_submission=sample_df)

        assert list(result.submission_df.columns) == ['target_id_abc', 'target_value_abc']
        assert roc_auc_score(y_test, result.submission_df['target_value_abc']) > 0.8
@ -506,7 +506,7 @@ def test_experiment_sample_submission_multiclass():
    }

    with get_temp_directory() as temp_path:
-        result = experiment(params, X_train, y_train, X_test, temp_path, sample_submission=sample_df)
+        result = run_experiment(params, X_train, y_train, X_test, temp_path, sample_submission=sample_df)

        assert list(result.submission_df.columns) == ['target_id_abc',
                                                      'target_class_0',
@ -539,11 +539,11 @@ def test_with_feature_attachment():
        X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False)

        with get_temp_directory() as temp_path:
-            result_wo_feature = experiment(params, X_train, y_train, X_test, logging_directory=temp_path)
+            result_wo_feature = run_experiment(params, X_train, y_train, X_test, logging_directory=temp_path)

        with get_temp_directory() as temp_path:
-            result_w_feature = experiment(params, X_train, y_train, X_test, logging_directory=temp_path,
-                                          feature_list=[0, 1, 2, 3], feature_directory=temp_feature_path)
+            result_w_feature = run_experiment(params, X_train, y_train, X_test, logging_directory=temp_path,
+                                              feature_list=[0, 1, 2, 3], feature_directory=temp_feature_path)

        assert result_w_feature.metrics[-1] > result_wo_feature.metrics[-1]

@ -560,8 +560,8 @@ def test_with_long_params():

    with get_temp_directory() as temp_path:
        # just to make sure experiment finish
-        experiment(params, X_train, y_train, X_test,
-                   logging_directory=temp_path, with_mlflow=True)
+        run_experiment(params, X_train, y_train, X_test,
+                       logging_directory=temp_path, with_mlflow=True)


 def test_with_rare_categories():
@ -602,9 +602,9 @@ def test_with_rare_categories():

        for algorithm in ('cat', 'xgb', 'lgbm'):
            with get_temp_directory() as temp_path:
-                experiment(params[algorithm], X_train, y_train, X_test, algorithm_type=algorithm,
-                           logging_directory=temp_path, with_mlflow=True, with_auto_prep=True,
-                           categorical_feature=['x0', 'x1', 'x2', 'x3'])
+                run_experiment(params[algorithm], X_train, y_train, X_test, algorithm_type=algorithm,
+                               logging_directory=temp_path, with_mlflow=True, with_auto_prep=True,
+                               categorical_feature=['x0', 'x1', 'x2', 'x3'])


 def test_inherit_outer_scope_run():
@ -618,7 +618,7 @@ def test_inherit_outer_scope_run():
    X, y = make_classification_df()

    with get_temp_directory() as temp_path:
-        experiment(params, X, y, with_mlflow=True, logging_directory=temp_path)
+        run_experiment(params, X, y, with_mlflow=True, logging_directory=temp_path)

    assert mlflow.active_run() is not None  # still valid