Source code for pymccrgb.classification

""" Utilities for updating classifcation of point clouds """

from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline

DEFAULT_PARAMETERS = {
    "n_components": 100,
    "gamma": 0.01,
    "alpha": 0.001,
    "max_iter": 100,
    "n_jobs": -1,
}


[docs]def make_sgd_pipeline(X_train, y_train, **kwargs):
    """ Returns an sklearn Pipeline for SGD classification with an RBF kernel

    Parameters
    ----------
        X_train: array
            An n x p array of training examples
        y_train: array
            An n x 1 array of training labels
        n_components: int
            The number of RBF components to use
            (Default: 100)
        gamma: float
            The gamma/variance parameter of the RBF kernel
            (Default: 0.01)
        alpha: float
            The penalty parameter of the SGD/SVM classifier
            (Default: 0.001)
        max_iter: int
            The maximum number of iterations to fit the classifier
            (Default: 100)
        n_jobs: int
            The number of jobs to use in fitting the classifier
            (Default: -1, Use all cores)
        Any other keyword argument to sklearn.linear_model.SGDClassifier

    Returns
    -------
        A trained pipeline composed of an RBF transformer and SGD classifier
    """
    if y_train.ndim == 2:
        y_train = y_train.ravel()

    n_components = kwargs.get("n_components", DEFAULT_PARAMETERS["n_components"])
    gamma = kwargs.get("gamma", DEFAULT_PARAMETERS["gamma"])
    alpha = kwargs.get("alpha", DEFAULT_PARAMETERS["alpha"])
    max_iter = kwargs.get("max_iter", DEFAULT_PARAMETERS["max_iter"])
    n_jobs = kwargs.get("n_jobs", DEFAULT_PARAMETERS["n_jobs"])

    estimators = [
        ("rbf", RBFSampler(gamma=gamma, n_components=n_components)),
        ("clf", SGDClassifier(alpha=alpha, n_jobs=n_jobs, max_iter=max_iter)),
    ]
    pipeline = Pipeline(estimators)
    pipeline.fit(X_train, y_train)
    return pipeline