reformat files

2020-05-18 23:04:03 +09:00 · 2020-05-18 23:04:03 +09:00 · 345f1b3746
parent 6f64d1dd25
commit 345f1b3746
18 changed files with 50 additions and 53 deletions
--- a/nyaggle/ensemble/averaging.py
+++ b/nyaggle/ensemble/averaging.py
@ -1,5 +1,4 @@
-from collections import namedtuple
-from typing import Callable, Iterable, List, Union, Optional, Tuple
+from typing import Callable, List, Optional, Tuple

 import numpy as np
 import pandas as pd
@ -105,6 +104,7 @@ def averaging_opt(test_predictions: List[np.ndarray],
        * score:
            float, Calculated score on Out-of-Fold data. ``None`` if ``eval_func`` is ``None``.
    """
+
    def _minimize(weights):
        prediction = np.zeros_like(oof_predictions[0])
        for weight, oof in zip(weights, oof_predictions):
--- a/nyaggle/ensemble/stacking.py
+++ b/nyaggle/ensemble/stacking.py
@ -94,4 +94,3 @@ def stacking(test_predictions: List[np.ndarray],
    score = result.scores[-1] if result.scores else None

    return EnsembleResult(result.test_prediction, result.oof_prediction, score)
-
--- a/nyaggle/experiment/auto_prep.py
+++ b/nyaggle/experiment/auto_prep.py
@ -56,4 +56,3 @@ def _fill_na_by_unique_value(strain: pd.Series, stest: Optional[pd.Series]) -> T
        return strain.fillna(fillval), stest.fillna(fillval)
    else:
        return strain.astype(str), stest.astype(str)
-
--- a/nyaggle/experiment/hyperparameter_tuner.py
+++ b/nyaggle/experiment/hyperparameter_tuner.py
@ -1,8 +1,8 @@
 import copy
 from typing import Dict, Iterable, Optional, Union

-import pandas as pd
 import optuna.integration.lightgbm as optuna_lgb
+import pandas as pd
 import sklearn.utils.multiclass as multiclass
 from sklearn.model_selection import BaseCrossValidator

--- a/nyaggle/experiment/run.py
+++ b/nyaggle/experiment/run.py
@ -5,7 +5,6 @@ from collections import namedtuple
 from datetime import datetime
 from typing import Any, Callable, Dict, Iterable, List, Optional, Type, Union

-import numpy as np
 import pandas as pd
 import sklearn.utils.multiclass as multiclass
 from sklearn.base import BaseEstimator
--- a/nyaggle/feature/category_encoder/target_encoder.py
+++ b/nyaggle/feature/category_encoder/target_encoder.py
@ -33,6 +33,7 @@ class KFoldEncoderWrapper(BaseFeaturizer):
            If True, `transform` and `fit_transform` return the same type as X.
            If False, these APIs always return a numpy array, similar to sklearn's API.
    """
+
    def __init__(self, base_transformer: BaseEstimator,
                 cv: Optional[Union[int, Iterable, BaseCrossValidator]] = None, return_same_type: bool = True,
                 groups: Optional[pd.Series] = None):
@ -168,6 +169,7 @@ class TargetEncoder(KFoldEncoderWrapper):
            If True, ``transform`` and ``fit_transform`` return the same type as X.
            If False, these APIs always return a numpy array, similar to sklearn's API.
    """
+
    def __init__(self, cv: Optional[Union[Iterable, BaseCrossValidator]] = None,
                 groups: Optional[pd.Series] = None,
                 cols: List[str] = None,
--- a/nyaggle/feature_store/feature_store.py
+++ b/nyaggle/feature_store/feature_store.py
@ -1,10 +1,10 @@
 import functools
 import os
-import pyarrow
 import warnings
 from typing import List, Optional, Union

 import pandas as pd
+import pyarrow
 from tqdm import tqdm


@ -171,6 +171,7 @@ def cached_feature(feature_name: Union[int, str], directory: str = './features/'
        "called"
        >>> x = make_feature_x(...)  # load from file in the second time
    """
+
    def _decorator(fun):
        @functools.wraps(fun)
        def _decorated_fun(*args, **kwargs):
--- a/nyaggle/util/plot_importance.py
+++ b/nyaggle/util/plot_importance.py
@ -1,11 +1,11 @@
 from typing import Optional, Tuple

-import pandas as pd
 import matplotlib.pyplot as plt
+import pandas as pd
 import seaborn as sns


-def plot_importance(importance: pd.DataFrame, path: Optional[str] = None, top_n: int = 100, 
+def plot_importance(importance: pd.DataFrame, path: Optional[str] = None, top_n: int = 100,
                    figsize: Optional[Tuple[int, int]] = None,
                    title: Optional[str] = None):
    """
@ -37,9 +37,9 @@ def plot_importance(importance: pd.DataFrame, path: Optional[str] = None, top_n:
        >>> })
        >>> plot_importance(importance, 'importance.png')
    """
-    importance = importance.groupby('feature')['importance']\
-        .mean()\
-        .reset_index()\
+    importance = importance.groupby('feature')['importance'] \
+        .mean() \
+        .reset_index() \
        .sort_values(by='importance', ascending=False)

    if len(importance) > top_n:
--- a/nyaggle/validation/adversarial_validate.py
+++ b/nyaggle/validation/adversarial_validate.py
@ -19,8 +19,8 @@ def adversarial_validate(X_train: pd.DataFrame,
                         X_test: pd.DataFrame,
                         importance_type: str = 'gain',
                         estimator: Optional[BaseEstimator] = None,
-                         cat_cols = None,
-                         cv = None) -> ADVResult:
+                         cat_cols=None,
+                         cv=None) -> ADVResult:
    """
    Perform adversarial validation between X_train and X_test.

@ -63,7 +63,7 @@ def adversarial_validate(X_train: pd.DataFrame,
        col_9   170.6438643
    """
    concat = pd.concat([X_train, X_test]).copy().reset_index(drop=True)
-    y = np.array([1]*len(X_train) + [0]*len(X_test))
+    y = np.array([1] * len(X_train) + [0] * len(X_test))

    if estimator is None:
        requires_lightgbm()
--- a/nyaggle/validation/cross_validate.py
+++ b/nyaggle/validation/cross_validate.py
@ -11,10 +11,10 @@ import sklearn.utils.multiclass as multiclass
 from category_encoders.utils import convert_input, convert_input_vector
 from sklearn.base import BaseEstimator
 from sklearn.model_selection import BaseCrossValidator
+
 from nyaggle.util.traits import is_gbdt_instance
 from nyaggle.validation.split import check_cv

-
 CVResult = namedtuple('CVResult', ['oof_prediction', 'test_prediction', 'scores', 'importance'])


--- a/nyaggle/validation/split.py
+++ b/nyaggle/validation/split.py
@ -70,6 +70,7 @@ class Take(BaseCrossValidator):
        >>> folds.get_n_splits()
        3
    """
+
    def __init__(self, n: int, base_validator: BaseCrossValidator):
        self.base_validator = base_validator
        self.n = n
@ -118,6 +119,7 @@ class Skip(BaseCrossValidator):
        >>> folds.get_n_splits()
        2
    """
+
    def __init__(self, n: int, base_validator: BaseCrossValidator):
        self.base_validator = base_validator
        self.n = n
@ -156,9 +158,10 @@ class Nth(BaseCrossValidator):
        >>> folds.get_n_splits()
        1
    """
+
    def __init__(self, n: int, base_validator: BaseCrossValidator):
        assert n > 0, "n is 1-origin and should be greater than 0"
-        self.base_validator = Take(1, Skip(n-1, base_validator))
+        self.base_validator = Take(1, Skip(n - 1, base_validator))
        self.n = n

    def get_n_splits(self, X=None, y=None, groups=None):
@ -245,7 +248,8 @@ class TimeSeriesSplit(BaseCrossValidator):
        assert train_interval[1], "train_interval[1] should not be None"
        assert test_interval[0], "test_interval[0] should not be None"

-        assert (not train_interval[0]) or (train_interval[0] <= train_interval[1]), "train_interval[0] < train_interval[1]"
+        assert (not train_interval[0]) or (
+                    train_interval[0] <= train_interval[1]), "train_interval[0] < train_interval[1]"
        assert (not test_interval[1]) or (test_interval[0] <= test_interval[1]), "test_interval[0] < test_interval[1]"

        self.times.append((train_interval, test_interval))
@ -440,7 +444,7 @@ class StratifiedGroupKFold(_BaseKFold):
            else:
                freq_loss = freq_loss / denom
            losses = ratio_loss + freq_loss
-            #-------
+            # -------
            splitx = np.argmin(losses)
            split_freq[splitx] = cand_freq[splitx]
            split_ratios[splitx] = cand_ratio[splitx]
--- a/setup.py
+++ b/setup.py
@ -1,7 +1,8 @@
-from setuptools import find_packages, setup
 from codecs import open
 from os import path

+from setuptools import find_packages, setup
+

 def get_long_description():
    here = path.abspath(path.dirname(__file__))
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -1,7 +1,8 @@
 import os
-import tempfile
 import shutil
+import tempfile
 import uuid
+
 import pytest


--- a/tests/ensemble/test_averaging.py
+++ b/tests/ensemble/test_averaging.py
@ -1,15 +1,14 @@
-import numpy as np
 import scipy.stats as stats
 from numpy.testing import assert_array_almost_equal
-from sklearn.model_selection import train_test_split
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 from sklearn.linear_model import Ridge, LogisticRegression
-from sklearn.utils.multiclass import type_of_target
-from sklearn.svm import SVC, SVR
 from sklearn.metrics import roc_auc_score, mean_squared_error
+from sklearn.model_selection import train_test_split
+from sklearn.svm import SVC, SVR
+from sklearn.utils.multiclass import type_of_target

-from nyaggle.testing import make_classification_df, make_regression_df
 from nyaggle.ensemble import averaging, averaging_opt
+from nyaggle.testing import make_classification_df, make_regression_df
 from nyaggle.validation import cross_validate


@ -40,7 +39,7 @@ def test_averaging():

    result = averaging(test)

-    assert_array_almost_equal((test[0]+test[1]+test[2])/3, result.test_prediction)
+    assert_array_almost_equal((test[0] + test[1] + test[2]) / 3, result.test_prediction)
    assert result.score is None
    assert result.oof_prediction is None

@ -53,8 +52,8 @@ def test_averaging_with_oof():

    result = averaging(test, oof, y_train)

-    assert_array_almost_equal((test[0]+test[1]+test[2])/3, result.test_prediction)
-    assert_array_almost_equal((oof[0]+oof[1]+oof[2])/3, result.oof_prediction)
+    assert_array_almost_equal((test[0] + test[1] + test[2]) / 3, result.test_prediction)
+    assert_array_almost_equal((oof[0] + oof[1] + oof[2]) / 3, result.oof_prediction)
    assert result.score is None


@ -66,8 +65,8 @@ def test_averaging_regression():

    result = averaging(test, oof, y_train)

-    assert_array_almost_equal((test[0]+test[1]+test[2])/3, result.test_prediction)
-    assert_array_almost_equal((oof[0]+oof[1]+oof[2])/3, result.oof_prediction)
+    assert_array_almost_equal((test[0] + test[1] + test[2]) / 3, result.test_prediction)
+    assert_array_almost_equal((oof[0] + oof[1] + oof[2]) / 3, result.oof_prediction)
    assert result.score is None


@ -79,8 +78,8 @@ def test_averaging_multiclass():

    result = averaging(test, oof, y_train)

-    assert_array_almost_equal((test[0]+test[1]+test[2])/3, result.test_prediction)
-    assert_array_almost_equal((oof[0]+oof[1]+oof[2])/3, result.oof_prediction)
+    assert_array_almost_equal((test[0] + test[1] + test[2]) / 3, result.test_prediction)
+    assert_array_almost_equal((oof[0] + oof[1] + oof[2]) / 3, result.oof_prediction)
    assert result.score is None


@ -103,8 +102,8 @@ def test_weight_averaging():

    result = averaging(test, oof, y_train, weights=[0.2, 0.4, 0.3])

-    assert_array_almost_equal(0.2*test[0]+0.4*test[1]+0.3*test[2], result.test_prediction)
-    assert_array_almost_equal(0.2*oof[0]+0.4*oof[1]+0.3*oof[2], result.oof_prediction)
+    assert_array_almost_equal(0.2 * test[0] + 0.4 * test[1] + 0.3 * test[2], result.test_prediction)
+    assert_array_almost_equal(0.2 * oof[0] + 0.4 * oof[1] + 0.3 * oof[2], result.oof_prediction)
    assert result.score is None


@ -118,7 +117,7 @@ def test_rank_averaging():

    test_rank = [stats.rankdata(t) / len(X_test) for t in test]

-    assert_array_almost_equal((test_rank[0]+test_rank[1]+test_rank[2])/3, result.test_prediction)
+    assert_array_almost_equal((test_rank[0] + test_rank[1] + test_rank[2]) / 3, result.test_prediction)
    assert result.score is None


@ -133,8 +132,8 @@ def test_rank_averaging_with_oof():
    oof_rank = [stats.rankdata(o) / len(X_train) for o in oof]
    test_rank = [stats.rankdata(t) / len(X_test) for t in test]

-    assert_array_almost_equal((test_rank[0]+test_rank[1]+test_rank[2])/3, result.test_prediction)
-    assert_array_almost_equal((oof_rank[0]+oof_rank[1]+oof_rank[2])/3, result.oof_prediction)
+    assert_array_almost_equal((test_rank[0] + test_rank[1] + test_rank[2]) / 3, result.test_prediction)
+    assert_array_almost_equal((oof_rank[0] + oof_rank[1] + oof_rank[2]) / 3, result.oof_prediction)
    assert result.score is None


--- a/tests/ensemble/test_stacking.py
+++ b/tests/ensemble/test_stacking.py
@ -1,15 +1,12 @@
-import numpy as np
-import scipy.stats as stats
-from numpy.testing import assert_array_almost_equal
-from sklearn.model_selection import train_test_split
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 from sklearn.linear_model import Ridge, LogisticRegression
-from sklearn.utils.multiclass import type_of_target
+from sklearn.metrics import roc_auc_score
+from sklearn.model_selection import train_test_split
 from sklearn.svm import SVC, SVR
-from sklearn.metrics import roc_auc_score, mean_squared_error
+from sklearn.utils.multiclass import type_of_target

-from nyaggle.testing import make_classification_df, make_regression_df
 from nyaggle.ensemble import stacking
+from nyaggle.testing import make_classification_df
 from nyaggle.validation import cross_validate


@ -43,5 +40,3 @@ def test_stacking():
    result = stacking(test, oof, y_train, eval_func=roc_auc_score)

    assert roc_auc_score(y_train, result.oof_prediction) > worst_base_roc
-
-
--- a/tests/feature/category_encoder/test_target_encoder.py
+++ b/tests/feature/category_encoder/test_target_encoder.py
@ -61,7 +61,7 @@ def test_target_encoder_fit_transform():
    })

    X = pd.concat([X_train, X_test])
-    y = pd.concat([y_train, pd.Series([None]*4)]).astype(float)
+    y = pd.concat([y_train, pd.Series([None] * 4)]).astype(float)

    ce1 = TargetEncoder(cols=['x'])
    ce1.fit(X_train, y_train)
--- a/tests/feature/nlp/test_bert.py
+++ b/tests/feature/nlp/test_bert.py
@ -89,7 +89,7 @@ def test_bert_en_svd_multicol():
    ret = bert.fit_transform(X)

    assert ret.shape[0] == 6
-    assert ret.shape[1] == 2*768 + 1
+    assert ret.shape[1] == 2 * 768 + 1

    ret.drop('id', axis=1, inplace=True)
    npt.assert_almost_equal(ret.iloc[0, :].values, ret.iloc[4, :].values, decimal=3)
@ -112,5 +112,3 @@ def test_bert_jp():
    ret.drop('id', axis=1, inplace=True)
    npt.assert_almost_equal(ret.iloc[0, :].values, ret.iloc[4, :].values)
    npt.assert_almost_equal(ret.iloc[0, :].values, ret.iloc[5, :].values)
-
-
--- a/tests/feature_store/test_feature_store.py
+++ b/tests/feature_store/test_feature_store.py
@ -1,12 +1,11 @@
 import os
-import pytest

 import numpy as np
 import pandas as pd
+import pytest
 from pandas.testing import assert_frame_equal

 import nyaggle.feature_store as fs
-
 from nyaggle.testing import get_temp_directory