Updated comments/documentation

2018-07-20 09:35:30 -05:00 · 2018-07-20 09:35:30 -05:00 · e67c3221ac
parent 13fa08cf6f
commit e67c3221ac
3 changed files with 30 additions and 20 deletions
--- a/packtml/decision_tree/metrics.py
+++ b/packtml/decision_tree/metrics.py
@ -80,24 +80,6 @@ class BaseCriterion(object):
        """


-class VarianceReduction(BaseCriterion):
-    """Compute the variance reduction after a split.
-
-    Variance reduction is a splitting criterion used by CART trees in the
-    context of regression. It examines the variance in a target before and
-    after a split to determine whether we've reduced the variability in the
-    target.
-    """
-    def compute_uncertainty(self, y):
-        """Compute the variance of a target."""
-        return np.var(y)
-
-    def __call__(self, target, mask, uncertainty):
-        left, right = target[mask], target[~mask]
-        return uncertainty - (self.compute_uncertainty(left) +
-                              self.compute_uncertainty(right))
-
-
 class InformationGain(BaseCriterion):
    """Compute the information gain after a split.

@ -143,3 +125,21 @@ class InformationGain(BaseCriterion):

        crit = self.crit  # type: callable
        return uncertainty - p * crit(left) - (1 - p) * crit(right)
+
+
+class VarianceReduction(BaseCriterion):
+    """Compute the variance reduction after a split.
+
+    Variance reduction is a splitting criterion used by CART trees in the
+    context of regression. It examines the variance in a target before and
+    after a split to determine whether we've reduced the variability in the
+    target.
+    """
+    def compute_uncertainty(self, y):
+        """Compute the variance of a target."""
+        return np.var(y)
+
+    def __call__(self, target, mask, uncertainty):
+        left, right = target[mask], target[~mask]
+        return uncertainty - (self.compute_uncertainty(left) +
+                              self.compute_uncertainty(right))
--- a/packtml/neural_net/mlp.py
+++ b/packtml/neural_net/mlp.py
@ -87,7 +87,7 @@ class NeuralNetClassifier(BaseSimpleEstimator, NeuralMixin):
    optimized neural network code, look into TensorFlow, Keras or other
    libraries.

-    This implementation of a neural net uses the ReLu activation function
+    This implementation of a neural net uses the TanH activation function
    *only*, and does not allow early convergence. It will continue for
    ``n_iter``. There are many other parameters that would typically be
    tunable in a network, for instance dropout, regularization, learning
@ -222,11 +222,19 @@ class NeuralNetClassifier(BaseSimpleEstimator, NeuralMixin):
    @staticmethod
    def _back_propagate(truth, probas, layer_results, weights,
                        biases, learning_rate, l2):
+        # Compute the gradient (derivative) of our loss function WRT our
+        # last layer of weights/biases, and back propagate the error back
+        # up the layers, adjusting the weights as we go.
+        #
+        # Or, expressed in the chain rule:
+        # dL/dW = (dL/dZ)(dZ/dW) ...
+
        # the probabilities are our first delta. Subtract 1 from the
        # TRUE labels' probabilities in the predictions
        n_samples = truth.shape[0]

        # subtract 1 from true idcs. initial deltas are: (y_hat - y)
+        # This computes d2 = Y - T
        probas[range(n_samples), truth] -= 1.

        # iterate back through the layers computing the deltas (derivatives)
--- a/packtml/recommendation/itemitem.py
+++ b/packtml/recommendation/itemitem.py
@ -51,7 +51,9 @@ class ItemItemRecommender(BaseSimpleEstimator, RecommenderMixin):

        # save the hyper param for later use later
        self.k = k
+        self.similarity = self._compute_sim(R, k)

+    def _compute_sim(self, R, k):
        # compute the similarity between all the items. This calculates the
        # similarity between each ITEM
        sim = cosine_similarity(R.T)
@ -66,7 +68,7 @@ class ItemItemRecommender(BaseSimpleEstimator, RecommenderMixin):
                                    not_top_k.shape[1])
            sim[row_indices, not_top_k.ravel()] = 0.

-        self.similarity = sim
+        return sim

    def recommend_for_user(self, R, user, n=10,
                           filter_previously_seen=False,