Merge pull request #50 from nyanp/feature/custom-experiment
add inherit_experiment parameter to run_experimentpull/52/head
commit
df7a6a35ec
|
@ -53,3 +53,24 @@ If you are familiar with mlflow tracking, you may notice that these APIs are sim
|
|||
# logging as you want, and you can see the result in mlflow ui
|
||||
...
|
||||
|
||||
|
||||
|
||||
Log extra parameters to run_experiment
|
||||
---------------------------------------
|
||||
|
||||
By using ``inherit_experiment`` parameter, you can mix any additional logging with the results ``run_experiment`` will create.
|
||||
In the following example, nyaggle records the result of ``run_experiment`` under the same experiment as
|
||||
the parameter and metrics written outside of the function.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from nyaggle.experiment import Experiment, run_experiment
|
||||
|
||||
with Experiment(logging_directory='./output/') as exp:
|
||||
|
||||
exp.log_param('my extra param', 'bar')
|
||||
|
||||
run_experiment(..., inherit_experiment=exp)
|
||||
|
||||
exp.log_metrics('my extra metrics', 0.999)
|
||||
|
||||
|
|
|
@ -33,6 +33,25 @@ ExperimentResult = namedtuple('ExperimentResult',
|
|||
])
|
||||
|
||||
|
||||
class ExpeimentProxy(object):
|
||||
__slots__ = ["_obj", "__weakref__"]
|
||||
|
||||
def __init__(self, obj):
|
||||
object.__setattr__(self, "_obj", obj)
|
||||
|
||||
def __getattribute__(self, name):
|
||||
return getattr(object.__getattribute__(self, "_obj"), name)
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
setattr(object.__getattribute__(self, "_obj"), name, value)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, ex_type, ex_value, trace):
|
||||
pass
|
||||
|
||||
|
||||
def run_experiment(model_params: Dict[str, Any],
|
||||
X_train: pd.DataFrame, y: pd.Series,
|
||||
X_test: Optional[pd.DataFrame] = None,
|
||||
|
@ -49,6 +68,7 @@ def run_experiment(model_params: Dict[str, Any],
|
|||
type_of_target: str = 'auto',
|
||||
feature_list: Optional[List[Union[int, str]]] = None,
|
||||
feature_directory: Optional[str] = None,
|
||||
inherit_experiment: Optional[Experiment] = None,
|
||||
with_auto_hpo: bool = False,
|
||||
with_auto_prep: bool = False,
|
||||
with_mlflow: bool = False
|
||||
|
@ -132,6 +152,9 @@ def run_experiment(model_params: Dict[str, Any],
|
|||
The list of feature ids saved through nyaggle.feature_store module.
|
||||
feature_directory:
|
||||
The location of features stored. Only used if feature_list is not empty.
|
||||
inherit_experiment:
|
||||
An experiment object which is used to log results. if not ``None``, all logs in this function are treated
|
||||
as a part of this experiment.
|
||||
with_auto_prep:
|
||||
If True, the input datasets will be copied and automatic preprocessing will be performed on them.
|
||||
For example, if ``gbdt_type = 'cat'``, all missing values in categorical features will be filled.
|
||||
|
@ -186,9 +209,14 @@ def run_experiment(model_params: Dict[str, Any],
|
|||
|
||||
logging_directory = logging_directory.format(time=datetime.now().strftime('%Y%m%d_%H%M%S'))
|
||||
|
||||
with Experiment(logging_directory, if_exists=if_exists, with_mlflow=with_mlflow) as exp:
|
||||
if inherit_experiment is not None:
|
||||
experiment = ExpeimentProxy(inherit_experiment)
|
||||
else:
|
||||
experiment = Experiment(logging_directory, if_exists=if_exists, with_mlflow=with_mlflow)
|
||||
|
||||
with experiment as exp:
|
||||
exp.log('Algorithm: {}'.format(algorithm_type))
|
||||
exp.log('Experiment: {}'.format(logging_directory))
|
||||
exp.log('Experiment: {}'.format(exp.logging_directory))
|
||||
exp.log('Params: {}'.format(model_params))
|
||||
exp.log('Features: {}'.format(list(X_train.columns)))
|
||||
exp.log_param('algorithm_type', algorithm_type)
|
||||
|
@ -230,19 +258,19 @@ def run_experiment(model_params: Dict[str, Any],
|
|||
# save importance plot
|
||||
if result.importance:
|
||||
importance = pd.concat(result.importance)
|
||||
plot_file_path = os.path.join(logging_directory, 'importance.png')
|
||||
plot_file_path = os.path.join(exp.logging_directory, 'importance.png')
|
||||
plot_importance(importance, plot_file_path)
|
||||
exp.log_artifact(plot_file_path)
|
||||
|
||||
# save trained model
|
||||
for i, model in enumerate(models):
|
||||
_save_model(model, logging_directory, i + 1, exp)
|
||||
_save_model(model, exp.logging_directory, i + 1, exp)
|
||||
|
||||
# save submission.csv
|
||||
submit_df = None
|
||||
if X_test is not None:
|
||||
submit_df = _make_submission_df(result.test_prediction, type_of_target, y, sample_submission)
|
||||
exp.log_dataframe(submission_filename or os.path.basename(logging_directory), submit_df, 'csv')
|
||||
exp.log_dataframe(submission_filename or os.path.basename(exp.logging_directory), submit_df, 'csv')
|
||||
|
||||
elapsed_time = time.time() - start_time
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ from sklearn.metrics import roc_auc_score, mean_squared_error, mean_absolute_err
|
|||
from sklearn.model_selection import GroupKFold, KFold, train_test_split
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
|
||||
from nyaggle.experiment import run_experiment
|
||||
from nyaggle.experiment import Experiment, run_experiment
|
||||
from nyaggle.feature_store import save_feature
|
||||
from nyaggle.testing import make_classification_df, make_regression_df, get_temp_directory
|
||||
|
||||
|
@ -634,3 +634,18 @@ def test_inherit_outer_scope_run():
|
|||
assert data.metrics['Overall'] > 0 # recorded
|
||||
|
||||
mlflow.end_run()
|
||||
|
||||
|
||||
def test_custom_experiment():
|
||||
params = {
|
||||
'objective': 'binary',
|
||||
'max_depth': 8
|
||||
}
|
||||
X, y = make_classification_df()
|
||||
|
||||
with get_temp_directory() as temp_path:
|
||||
with Experiment(temp_path, with_mlflow=True) as e:
|
||||
run_experiment(params, X, y, logging_directory='foobar', inherit_experiment=e)
|
||||
|
||||
# all files are logged into e.logging_directory, instead of 'foobar'
|
||||
_check_file_exists(temp_path, with_mlflow=True)
|
||||
|
|
Loading…
Reference in New Issue