fix throwing error in logging in case of callable fit-params

2020-05-26 23:36:04 +09:00 · 2020-05-26 23:36:04 +09:00 · 0d03e3cbcf
parent 04138a252b
commit 0d03e3cbcf
3 changed files with 38 additions and 3 deletions
--- a/nyaggle/experiment/run.py
+++ b/nyaggle/experiment/run.py
@ -221,7 +221,10 @@ def run_experiment(model_params: Dict[str, Any],
        exp.log('Features: {}'.format(list(X_train.columns)))
        exp.log_param('algorithm_type', algorithm_type)
        exp.log_param('num_features', X_train.shape[1])
-        exp.log_dict('fit_params', fit_params)
+        if callable(fit_params):
+            exp.log_param('fit_params', str(fit_params))
+        else:
+            exp.log_dict('fit_params', fit_params)
        exp.log_dict('model_params', model_params)
        if feature_list is not None:
            exp.log_param('features', feature_list)
@ -241,7 +244,8 @@ def run_experiment(model_params: Dict[str, Any],
        if cat_param_name is not None and not callable(fit_params) and cat_param_name not in fit_params:
            fit_params[cat_param_name] = categorical_feature

-        exp.log_params(fit_params)
+        if isinstance(fit_params, Dict):
+            exp.log_params(fit_params)

        result = cross_validate(models, X_train=X_train, y=y, X_test=X_test, cv=cv, groups=groups,
                                logger=exp.get_logger(), eval_func=eval_func, fit_params=fit_params,
--- a/tests/experiment/test_run.py
+++ b/tests/experiment/test_run.py
@ -1,5 +1,6 @@
 import json
 import os
+from typing import List
 from urllib.parse import urlparse, unquote

 import mlflow
@ -333,6 +334,37 @@ def test_experiment_fit_params(tmpdir_name):
    assert result2.models[-1].booster_.num_trees() < params['n_estimators']


+def test_experiment_fit_params_callback(tmpdir_name):
+    X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=2,
+                                  class_sep=0.98, random_state=0, id_column='user_id')
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
+
+    params = {
+        'objective': 'binary',
+        'max_depth': 8,
+        'n_estimators': 500
+    }
+
+    sample_weights = np.random.randint(1, 10, size=len(X_train))
+    sample_weights = sample_weights / sample_weights.sum()
+
+    def fit_params(n: int, train_index: List[int], valid_index: List[int]):
+        return {
+            'early_stopping_rounds': 100,
+            'sample_weight': list(sample_weights[train_index]),
+            'eval_sample_weight': [list(sample_weights[valid_index])]
+        }
+
+    result1 = run_experiment(params, X_train, y_train, X_test,
+                             os.path.join(tmpdir_name, '1'), fit_params=fit_params)
+
+    result2 = run_experiment(params, X_train, y_train, X_test,
+                             os.path.join(tmpdir_name, '2'))
+
+    assert result1.metrics[-1] != result2.metrics[-1]
+
+
 def test_experiment_mlflow(tmpdir_name):
    X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=2,
                                  class_sep=0.98, random_state=0, id_column='user_id')
--- a/tests/validation/test_cross_validate.py
+++ b/tests/validation/test_cross_validate.py
@ -149,7 +149,6 @@ def test_fit_params_callback():
    result_w_weight = cross_validate(models, X_train, y_train, X_test, cv=5,
                                     eval_func=roc_auc_score, fit_params=fit_params)

-
    result_wo_weight = cross_validate(models, X_train, y_train, X_test, cv=5,
                                      eval_func=roc_auc_score, fit_params={'early_stopping_rounds': 50})