Source code for pymccrgb.core

""" Multiscale curvature classification of ground points with color updates """

import numpy as np

from copy import copy

from .classification import make_sgd_pipeline
from .features import calculate_color_features
from .pointutils import equal_sample, intersect_rows

from pymcc_lidar import calculate_excess_height


[docs]def classify_ground_mcc(data, scale, tol, downsample=False): """ Classifies ground points by a single iteration of the MCC algorithm Classifies ground and nonground (or "high") points by comparing the elevation of each data point to an interpolated surface. If downsample is True, a down-sampled version of the data coordinates will be used when interpolating (currently not implemented). Based on MCC algorithm implemented in [1]_ and [2]_. .. [1] Evans, Jeffrey S.; Hudak, Andrew T. 2007. A multiscale curvature algorithm for classifying discrete return LiDAR in forested environments. Geoscience and Remote Sensing. 45(4): 1029-1038. .. [2] https://sourceforge.net/p/mcclidar Parameters ---------- data: array A n x 3 (or more) data matrix with rows [x, y, z, ...] scale: float The interpolation scale. This defines the resolution of the interpolated surface, which is calculated by a 3 x 3 windowed mean around each intrpolation point. tol: float The height tolerance. Points exceeding the durface by more than tol units are classified as nonground downsample: bool If True, use a downsampled dataset for interpolation. Not implemented. Returns ------- An n x 1 array of point class labels. By default, 1 is ground, and 0 is nonground. """ if downsample: raise NotImplementedError("Downsampling has not been implemented.") xyz = data[:, 0:3] height = calculate_excess_height(xyz.copy(order="C"), scale) y = height < tol # 0 = nonground, 1 = ground return y
[docs]def mcc( data, scales=[0.5, 1, 1.5], tols=[0.3, 0.3, 0.3], threshs=[1, 0.1, 0.01], use_las_codes=False, verbose=False, ): """ Classifies ground points using the MCC algorithm Classifies ground and nonground (or "high") points by comparing the elevation of each data point to an interpolated surface at user-defined scales. The algorithm iterates at each scale until a convergence threshold is reached based on the percentage of points classified as ground. Only ground points are retained after each iteration. Based on MCC algorithm implemented in [1]_ and [2]_. .. [1] Evans, Jeffrey S.; Hudak, Andrew T. 2007. A multiscale curvature algorithm for classifying discrete return LiDAR in forested environments. Geoscience and Remote Sensing. 45(4): 1029-1038. .. [2] https://sourceforge.net/p/mcclidar Parameters ---------- data: array A n x d data matrix with rows [x, y, z, ...] scales: list The interpolation scales. This defines the resolution of the interpolated surface, which is calculated by a 3 x 3 windowed mean around each intrpolation point. Defaults to [0.5, 1, 1.5] meters. tols: list The height tolerances. Points exceeding the durface by more than tol units are classified as nonground. Deaults to 0.3 meters. threshs: list The convergence thresholds as percentages. Defaults to [1%, 0.1%, 0.01%] use_las_codes: bool If True, return LAS 1.4 classification codes (2 = ground, 4 = medium vegetation). Default False. Returns ------- data: array An m x d array of ground points labels: array An n x 1 array of labels (1 is ground, 0 is nonground) """ original_data = copy(data) for scale, tol, thresh in zip(scales, tols, threshs): converged = False niter = 0 while not converged: n_points = data.shape[0] y = classify_ground_mcc(data, scale, tol) ground = y == 1 n_removed = np.sum(y == 0) converged = 100 * (n_removed / n_points) < thresh data = data[ground, :] if verbose: print("-" * 20) print("MCC iteration") print("-" * 20) print( "Scale: {:.2f}, Relative height: {:.1e}, iter: {}".format( scale, tol, niter ) ) print( "Removed {} nonground points ({:.2f} %)".format( n_removed, 100 * (n_removed / n_points) ) ) niter += 1 labels = intersect_rows(data, original_data) if verbose: n_ground = data.shape[0] n_points = original_data.shape[0] print( "Retained {} ground points ({:.2f} %)".format( n_ground, 100 * (n_ground / n_points) ) ) if use_las_codes: labels[labels == 0] = 4 # Vegetation labels[labels == 1] = 2 # Ground return data, labels
[docs]def mcc_rgb( data, scales=[0.5, 1, 1.5], tols=[0.3, 0.3, 0.3], threshs=[1, 0.1, 0.01], training_scales=None, training_tols=None, n_train=int(1e3), max_iter=20, n_jobs=1, seed=None, use_las_codes=False, verbose=False, **pipeline_kwargs, ): """ Classifies ground points using the MCC-RGB algorithm Classifies ground and nonground (or "high") points by comparing the elevation of each data point to an interpolated surface at user-defined scales. The algorithm proceeds as MCC (see the mcc() documentation), except that ground points are reclassified based on their color similarity to nonground points. Parameters ---------- data: array A n x d data matrix with rows [x, y, z, r, g, b ...] scales: list The interpolation scales. This defines the resolution of the interpolated surface, which is calculated by a 3 x 3 windowed mean around each interpolation point. Defaults to [0.5, 1, 1.5] meters. Scale domains are processed in order of increasing scale. tols: list The height tolerances. Points exceeding the surface by more than tol units are classified as nonground. Deaults to 0.3 meters. threshs: list The convergence thresholds as percentages. Defaults to [1%, 0.1%, 0.01%] training_scales: list The training interpolation scales. This defaults to the first scale domain (e.g., 0.5). Both training_scales and training_tols must be specified; otherwise the defaults are used. training_tols: list The training relative heights. Defaults to the first height tolerance (e.g., 0.3). Can be specified as a list or single value n_train: int The total number of points to use for training the color classifier. Defaults to 1E5. max_iter: int Maximum number of iterations in a scale domain. Defaults to 20. seed: int Optional seed value for selecting training data. use_las_codes: bool If True, return LAS 1.4 classification codes (2 = ground, 4 = medium vegetation). Default False. Returns ------- data: array An m x d array of ground points labels: array An n x 1 array of labels (1 is ground, 0 is nonground) updated: array An n x 1 array of labels indicating whether the point was updated in an MCC-RGB step. -1 indicates the point's classification was not updated. If there are multiple training scales, this will be the index of the scale and tolerance range defined in training_scales and training_tols. """ if training_scales is None: training_scales = scales[0:1] if training_tols is None: training_tols = tols[0:1] if not isinstance(training_scales, list): scale = float(training_scales) training_scales = len(training_tols) * [scale] if not isinstance(training_tols, list): tol = float(training_tols) training_tols = len(training_scales) * [tol] if len(training_scales) != len(training_tols): raise ValueError( "Not enough training scales or tolerances provided. Please give " "two lists of equal length, or a single value for training_tols." "Arguments were training_scales={} and training_tols={}".format( training_scales, training_tols ) ) params = zip(scales, tols) for scale, tol in zip(training_scales, training_tols): if (scale, tol) not in params: scales.append(scale) tols.append(tol) idx = np.argsort(scales) scales = np.array(scales) tols = np.array(tols) scales = scales[idx] tols = tols[idx] original_data = copy(data) # Mask NaN and infinite index/color values X = calculate_color_features(data) mask = np.isfinite(X).all(axis=-1) data = data[mask, :] n_points = data.shape[0] # updated = np.full((n_points,), fill_value=-1) reached_max_iter = False for scale, tol, thresh in zip(scales, tols, threshs): converged = False niter = 0 while not converged and not reached_max_iter: y = classify_ground_mcc(data, scale, tol) if verbose: n_removed_mcc = np.sum(y == 0) print("-" * 20) print("MCC step") print("-" * 20) print( "Scale: {:.2f}, Relative height: {:.1e}, iter: {}".format( scale, tol, niter ) ) print( "Removed {} nonground points ({:.2f} %)".format( n_removed_mcc, 100 * (n_removed_mcc / n_points) ) ) update_step = scale in training_scales and tol in training_tols first_iter = niter == 0 if update_step and first_iter: if verbose: print("-" * 20) print("Classification update step") print("-" * 20) try: X = calculate_color_features(data) X_train, y_train = equal_sample( X, y, size=int(n_train / 2), seed=seed ) pipeline = make_sgd_pipeline(X_train, y_train, **pipeline_kwargs) if n_jobs > 1 or n_jobs == -1: if verbose: print(f"Predicting in parallel using {n_jobs}") from sklearn.externals.joblib import Parallel, delayed pool = Parallel(n_jobs=n_jobs) wrapper = delayed(pipeline.predict) result = pool(wrapper(x.reshape(1, -1)) for x in X[y == 1, :]) y_pred_ground = np.array(result).ravel() else: y_pred_ground = pipeline.predict(X[y == 1, :]) y_pred = np.zeros_like(y) y_pred[y == 1] = y_pred_ground # params = list(zip(training_scales, training_tols)) # update_step_idx = params.index((scale, tol)) # updated[(y == 1) & (y_pred == 0)] = update_step_idx if verbose: n_removed_clf = np.sum((y == 1) & (y_pred == 0)) print( "Scale: {:.2f}, Relative height: {:.1e}".format(scale, tol) ) print( "Reclassified {} ground points as nonground ({:.2f} %)".format( n_removed_clf, 100 * (n_removed_clf / n_points) ) ) y[(y == 1) & (y_pred == 0)] = 0 except ValueError as e: print("Skipping classification update. ") print("ValueError: " + str(e)) ground = y == 1 data = data[ground, :] n_removed = np.sum(y == 0) converged = 100 * (n_removed / n_points) < thresh reached_max_iter = niter >= max_iter if reached_max_iter and verbose: print("Reached maximum number of iterations ({})".format(max_iter)) niter += 1 labels = intersect_rows(data, original_data) if verbose: n_ground = data.shape[0] n_points = original_data.shape[0] print() print( "Retained {} ground points ({:.2f} %)".format( n_ground, 100 * (n_ground / n_points) ) ) if use_las_codes: labels[labels == 0] = 4 # Vegetation labels[labels == 1] = 2 # Ground return data, labels # , updated