Source code for pymccrgb.pointutils

""" Utilties for common point cloud operations """

import numpy as np

from scipy.spatial import cKDTree


[docs]def intersect_rows(arr1, arr2): """ Returns a binary mask of the rows in arr1 that are in arr2 """ mask = np.zeros((arr2.shape[0],), dtype=bool) dict1 = {tuple(row): i for i, row in enumerate(arr1)} for i, row in enumerate(arr2): if tuple(row) in dict1: mask[i] = True return mask
[docs]def point_grid(x_min, x_max, y_min, y_max, dx, dy=None): """ Generates a grid of points within a bounding box """ if dy is None: dy = dx x = np.arange(x_min, x_max + dx, dx) y = np.arange(y_min, y_max + dy, dy) X, Y = np.meshgrid(x, y) points = np.vstack([X.ravel(), Y.ravel()]).T return points
[docs]def sample_point_cloud(source, target, sample_indices=[2]): """ Resamples a source point cloud at the coordinates of a target points Uses the nearest point in the target point cloud to the source point Parameters ---------- source: array Input point cloud target: array Target point cloud for sample locations sample_indices: list List of indices to sample from source. Defaults to 2 (z or height dimension) Returns ------- An array of sampled points """ sample_indices = np.array(sample_indices) tree = cKDTree(source[:, 0:2]) dist, idx = tree.query(target, n_jobs=-1) output = np.hstack( [ target, source[idx[:, None], sample_indices].reshape( (len(idx), len(sample_indices)) ), ] ) return output
[docs]def equal_sample(X, y, size=100, seed=None): """ Takes a sample of equal number of feature vectors from each input class Assumes y contains discrete labels 0, ..., ymax Raises ------ A ValueError if there are insufficient data in a particular class """ if seed is not None: np.random.seed(seed) Xs = [] ys = [] for val in range(y.max() + 1): subset = y == val if np.sum(subset) < size: raise ValueError( "Not enough data in class {}: sample size is {}, but only {} data are available".format( val, size, np.sum(subset) ) ) sample = np.random.choice(np.sum(subset), size=size) Xs.append(X[subset][sample, :]) ys.append(np.full((size, 1), fill_value=val)) X_sampled = np.vstack(Xs) y_sampled = np.vstack(ys) return X_sampled, y_sampled
[docs]def stratified_sample(X, y, size=100, seed=None): """ Takes a stratified sample of feature vectors from each input class Assumes y contains discrete labels 0, ..., ymax Raises ------ A ValueError if there are insufficient data in a particular class """ if seed is not None: np.random.seed(seed) Xs = [] ys = [] for val in range(y.max() + 1): subset = y == val frac = np.sum(subset) / len(y) target_size = int(frac * size) if np.sum(subset) < target_size: raise ValueError( "Not enough data in class {}: sample size is {}, but {} data are available".format( val, size, np.sum(subset) ) ) sample = np.random.choice(np.sum(subset), size=target_size) Xs.append(X[subset][sample, :]) ys.append(np.full((target_size, 1), fill_value=val)) X_sampled = np.vstack(Xs) y_sampled = np.vstack(ys) return X_sampled, y_sampled