Merge pull request #50 from nyanp/feature/custom-experiment

add inherit_experiment parameter to run_experiment
2020-02-21 23:27:45 +09:00 · 2020-02-21 23:27:45 +09:00 · df7a6a35ec
parent e5995d9c8e c410072c21
commit df7a6a35ec
3 changed files with 70 additions and 6 deletions
--- a/docs/source/tutorial/experiment_advanced.rst
+++ b/docs/source/tutorial/experiment_advanced.rst
@ -53,3 +53,24 @@ If you are familiar with mlflow tracking, you may notice that these APIs are sim
      # logging as you want, and you can see the result in mlflow ui
      ...

+
+
+Log extra parameters to run_experiment
+---------------------------------------
+
+By using ``inherit_experiment`` parameter, you can mix any additional logging with the results ``run_experiment`` will create.
+In the following example, nyaggle records the result of ``run_experiment`` under the same experiment as
+the parameter and metrics written outside of the function.
+
+.. code-block:: python
+
+  from nyaggle.experiment import Experiment, run_experiment
+
+  with Experiment(logging_directory='./output/') as exp:
+
+      exp.log_param('my extra param', 'bar')
+
+      run_experiment(..., inherit_experiment=exp)
+
+      exp.log_metrics('my extra metrics', 0.999)
+
--- a/nyaggle/experiment/run.py
+++ b/nyaggle/experiment/run.py
@ -33,6 +33,25 @@ ExperimentResult = namedtuple('ExperimentResult',
                              ])


+class ExpeimentProxy(object):
+    __slots__ = ["_obj", "__weakref__"]
+
+    def __init__(self, obj):
+        object.__setattr__(self, "_obj", obj)
+
+    def __getattribute__(self, name):
+        return getattr(object.__getattribute__(self, "_obj"), name)
+
+    def __setattr__(self, name, value):
+        setattr(object.__getattribute__(self, "_obj"), name, value)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, ex_type, ex_value, trace):
+        pass
+
+
 def run_experiment(model_params: Dict[str, Any],
                   X_train: pd.DataFrame, y: pd.Series,
                   X_test: Optional[pd.DataFrame] = None,
@ -49,6 +68,7 @@ def run_experiment(model_params: Dict[str, Any],
                   type_of_target: str = 'auto',
                   feature_list: Optional[List[Union[int, str]]] = None,
                   feature_directory: Optional[str] = None,
+                   inherit_experiment: Optional[Experiment] = None,
                   with_auto_hpo: bool = False,
                   with_auto_prep: bool = False,
                   with_mlflow: bool = False
@ -132,6 +152,9 @@ def run_experiment(model_params: Dict[str, Any],
            The list of feature ids saved through nyaggle.feature_store module.
        feature_directory:
            The location of features stored. Only used if feature_list is not empty.
+        inherit_experiment:
+            An experiment object which is used to log results. if not ``None``, all logs in this function are treated
+            as a part of this experiment.
        with_auto_prep:
            If True, the input datasets will be copied and automatic preprocessing will be performed on them.
            For example, if ``gbdt_type = 'cat'``, all missing values in categorical features will be filled.
@ -186,9 +209,14 @@ def run_experiment(model_params: Dict[str, Any],

    logging_directory = logging_directory.format(time=datetime.now().strftime('%Y%m%d_%H%M%S'))

-    with Experiment(logging_directory, if_exists=if_exists, with_mlflow=with_mlflow) as exp:
+    if inherit_experiment is not None:
+        experiment = ExpeimentProxy(inherit_experiment)
+    else:
+        experiment = Experiment(logging_directory, if_exists=if_exists, with_mlflow=with_mlflow)
+
+    with experiment as exp:
        exp.log('Algorithm: {}'.format(algorithm_type))
-        exp.log('Experiment: {}'.format(logging_directory))
+        exp.log('Experiment: {}'.format(exp.logging_directory))
        exp.log('Params: {}'.format(model_params))
        exp.log('Features: {}'.format(list(X_train.columns)))
        exp.log_param('algorithm_type', algorithm_type)
@ -230,19 +258,19 @@ def run_experiment(model_params: Dict[str, Any],
        # save importance plot
        if result.importance:
            importance = pd.concat(result.importance)
-            plot_file_path = os.path.join(logging_directory, 'importance.png')
+            plot_file_path = os.path.join(exp.logging_directory, 'importance.png')
            plot_importance(importance, plot_file_path)
            exp.log_artifact(plot_file_path)

        # save trained model
        for i, model in enumerate(models):
-            _save_model(model, logging_directory, i + 1, exp)
+            _save_model(model, exp.logging_directory, i + 1, exp)

        # save submission.csv
        submit_df = None
        if X_test is not None:
            submit_df = _make_submission_df(result.test_prediction, type_of_target, y, sample_submission)
-            exp.log_dataframe(submission_filename or os.path.basename(logging_directory), submit_df, 'csv')
+            exp.log_dataframe(submission_filename or os.path.basename(exp.logging_directory), submit_df, 'csv')

        elapsed_time = time.time() - start_time

--- a/tests/experiment/test_run.py
+++ b/tests/experiment/test_run.py
@ -11,7 +11,7 @@ from sklearn.metrics import roc_auc_score, mean_squared_error, mean_absolute_err
 from sklearn.model_selection import GroupKFold, KFold, train_test_split
 from sklearn.neighbors import KNeighborsClassifier

-from nyaggle.experiment import run_experiment
+from nyaggle.experiment import Experiment, run_experiment
 from nyaggle.feature_store import save_feature
 from nyaggle.testing import make_classification_df, make_regression_df, get_temp_directory

@ -634,3 +634,18 @@ def test_inherit_outer_scope_run():
    assert data.metrics['Overall'] > 0  # recorded

    mlflow.end_run()
+
+
+def test_custom_experiment():
+    params = {
+        'objective': 'binary',
+        'max_depth': 8
+    }
+    X, y = make_classification_df()
+
+    with get_temp_directory() as temp_path:
+        with Experiment(temp_path, with_mlflow=True) as e:
+            run_experiment(params, X, y, logging_directory='foobar', inherit_experiment=e)
+
+        # all files are logged into e.logging_directory, instead of 'foobar'
+        _check_file_exists(temp_path, with_mlflow=True)