""" Multiscale curvature classification of ground points with color updates """
import numpy as np
from copy import copy
from .classification import make_sgd_pipeline
from .features import calculate_color_features
from .pointutils import equal_sample, intersect_rows
from pymcc_lidar import calculate_excess_height
[docs]def classify_ground_mcc(data, scale, tol, downsample=False):
""" Classifies ground points by a single iteration of the MCC algorithm
Classifies ground and nonground (or "high") points by comparing the
elevation of each data point to an interpolated surface. If downsample is
True, a down-sampled version of the data coordinates will be used when
interpolating (currently not implemented).
Based on MCC algorithm implemented in [1]_ and [2]_.
.. [1] Evans, Jeffrey S.; Hudak, Andrew T. 2007. A multiscale curvature
algorithm for classifying discrete return LiDAR in forested
environments. Geoscience and Remote Sensing. 45(4): 1029-1038.
.. [2] https://sourceforge.net/p/mcclidar
Parameters
----------
data: array
A n x 3 (or more) data matrix with rows [x, y, z, ...]
scale: float
The interpolation scale. This defines the resolution of the
interpolated surface, which is calculated by a 3 x 3 windowed
mean around each intrpolation point.
tol: float
The height tolerance. Points exceeding the durface by more than
tol units are classified as nonground
downsample: bool
If True, use a downsampled dataset for interpolation.
Not implemented.
Returns
-------
An n x 1 array of point class labels. By default, 1 is ground,
and 0 is nonground.
"""
if downsample:
raise NotImplementedError("Downsampling has not been implemented.")
xyz = data[:, 0:3]
height = calculate_excess_height(xyz.copy(order="C"), scale)
y = height < tol # 0 = nonground, 1 = ground
return y
[docs]def mcc(
data,
scales=[0.5, 1, 1.5],
tols=[0.3, 0.3, 0.3],
threshs=[1, 0.1, 0.01],
use_las_codes=False,
verbose=False,
):
""" Classifies ground points using the MCC algorithm
Classifies ground and nonground (or "high") points by comparing the
elevation of each data point to an interpolated surface at user-defined
scales. The algorithm iterates at each scale until a convergence threshold
is reached based on the percentage of points classified as ground. Only
ground points are retained after each iteration.
Based on MCC algorithm implemented in [1]_ and [2]_.
.. [1] Evans, Jeffrey S.; Hudak, Andrew T. 2007. A multiscale curvature
algorithm for classifying discrete return LiDAR in forested
environments. Geoscience and Remote Sensing. 45(4): 1029-1038.
.. [2] https://sourceforge.net/p/mcclidar
Parameters
----------
data: array
A n x d data matrix with rows [x, y, z, ...]
scales: list
The interpolation scales. This defines the resolution of the
interpolated surface, which is calculated by a 3 x 3 windowed
mean around each intrpolation point. Defaults to [0.5, 1, 1.5]
meters.
tols: list
The height tolerances. Points exceeding the durface by more than
tol units are classified as nonground. Deaults to 0.3 meters.
threshs: list
The convergence thresholds as percentages. Defaults to
[1%, 0.1%, 0.01%]
use_las_codes: bool
If True, return LAS 1.4 classification codes (2 = ground,
4 = medium vegetation). Default False.
Returns
-------
data: array
An m x d array of ground points
labels: array
An n x 1 array of labels (1 is ground, 0 is nonground)
"""
original_data = copy(data)
for scale, tol, thresh in zip(scales, tols, threshs):
converged = False
niter = 0
while not converged:
n_points = data.shape[0]
y = classify_ground_mcc(data, scale, tol)
ground = y == 1
n_removed = np.sum(y == 0)
converged = 100 * (n_removed / n_points) < thresh
data = data[ground, :]
if verbose:
print("-" * 20)
print("MCC iteration")
print("-" * 20)
print(
"Scale: {:.2f}, Relative height: {:.1e}, iter: {}".format(
scale, tol, niter
)
)
print(
"Removed {} nonground points ({:.2f} %)".format(
n_removed, 100 * (n_removed / n_points)
)
)
niter += 1
labels = intersect_rows(data, original_data)
if verbose:
n_ground = data.shape[0]
n_points = original_data.shape[0]
print(
"Retained {} ground points ({:.2f} %)".format(
n_ground, 100 * (n_ground / n_points)
)
)
if use_las_codes:
labels[labels == 0] = 4 # Vegetation
labels[labels == 1] = 2 # Ground
return data, labels
[docs]def mcc_rgb(
data,
scales=[0.5, 1, 1.5],
tols=[0.3, 0.3, 0.3],
threshs=[1, 0.1, 0.01],
training_scales=None,
training_tols=None,
n_train=int(1e3),
max_iter=20,
n_jobs=1,
seed=None,
use_las_codes=False,
verbose=False,
**pipeline_kwargs,
):
""" Classifies ground points using the MCC-RGB algorithm
Classifies ground and nonground (or "high") points by comparing the
elevation of each data point to an interpolated surface at user-defined
scales. The algorithm proceeds as MCC (see the mcc() documentation), except
that ground points are reclassified based on their color similarity to
nonground points.
Parameters
----------
data: array
A n x d data matrix with rows [x, y, z, r, g, b ...]
scales: list
The interpolation scales. This defines the resolution of the
interpolated surface, which is calculated by a 3 x 3 windowed
mean around each interpolation point. Defaults to [0.5, 1, 1.5]
meters. Scale domains are processed in order of increasing scale.
tols: list
The height tolerances. Points exceeding the surface by more than
tol units are classified as nonground. Deaults to 0.3 meters.
threshs: list
The convergence thresholds as percentages. Defaults to
[1%, 0.1%, 0.01%]
training_scales: list
The training interpolation scales.
This defaults to the first scale domain (e.g., 0.5). Both
training_scales and training_tols must be specified;
otherwise the defaults are used.
training_tols: list
The training relative heights. Defaults to the first
height tolerance (e.g., 0.3). Can be specified as a list or
single value
n_train: int
The total number of points to use for training the color
classifier. Defaults to 1E5.
max_iter: int
Maximum number of iterations in a scale domain.
Defaults to 20.
seed: int
Optional seed value for selecting training data.
use_las_codes: bool
If True, return LAS 1.4 classification codes (2 = ground,
4 = medium vegetation). Default False.
Returns
-------
data: array
An m x d array of ground points
labels: array
An n x 1 array of labels (1 is ground, 0 is nonground)
updated: array
An n x 1 array of labels indicating whether the point was
updated in an MCC-RGB step. -1 indicates the point's classification
was not updated. If there are multiple training scales,
this will be the index of the scale and tolerance range defined
in training_scales and training_tols.
"""
if training_scales is None:
training_scales = scales[0:1]
if training_tols is None:
training_tols = tols[0:1]
if not isinstance(training_scales, list):
scale = float(training_scales)
training_scales = len(training_tols) * [scale]
if not isinstance(training_tols, list):
tol = float(training_tols)
training_tols = len(training_scales) * [tol]
if len(training_scales) != len(training_tols):
raise ValueError(
"Not enough training scales or tolerances provided. Please give "
"two lists of equal length, or a single value for training_tols."
"Arguments were training_scales={} and training_tols={}".format(
training_scales, training_tols
)
)
params = zip(scales, tols)
for scale, tol in zip(training_scales, training_tols):
if (scale, tol) not in params:
scales.append(scale)
tols.append(tol)
idx = np.argsort(scales)
scales = np.array(scales)
tols = np.array(tols)
scales = scales[idx]
tols = tols[idx]
original_data = copy(data)
# Mask NaN and infinite index/color values
X = calculate_color_features(data)
mask = np.isfinite(X).all(axis=-1)
data = data[mask, :]
n_points = data.shape[0]
# updated = np.full((n_points,), fill_value=-1)
reached_max_iter = False
for scale, tol, thresh in zip(scales, tols, threshs):
converged = False
niter = 0
while not converged and not reached_max_iter:
y = classify_ground_mcc(data, scale, tol)
if verbose:
n_removed_mcc = np.sum(y == 0)
print("-" * 20)
print("MCC step")
print("-" * 20)
print(
"Scale: {:.2f}, Relative height: {:.1e}, iter: {}".format(
scale, tol, niter
)
)
print(
"Removed {} nonground points ({:.2f} %)".format(
n_removed_mcc, 100 * (n_removed_mcc / n_points)
)
)
update_step = scale in training_scales and tol in training_tols
first_iter = niter == 0
if update_step and first_iter:
if verbose:
print("-" * 20)
print("Classification update step")
print("-" * 20)
try:
X = calculate_color_features(data)
X_train, y_train = equal_sample(
X, y, size=int(n_train / 2), seed=seed
)
pipeline = make_sgd_pipeline(X_train, y_train, **pipeline_kwargs)
if n_jobs > 1 or n_jobs == -1:
if verbose:
print(f"Predicting in parallel using {n_jobs}")
from sklearn.externals.joblib import Parallel, delayed
pool = Parallel(n_jobs=n_jobs)
wrapper = delayed(pipeline.predict)
result = pool(wrapper(x.reshape(1, -1)) for x in X[y == 1, :])
y_pred_ground = np.array(result).ravel()
else:
y_pred_ground = pipeline.predict(X[y == 1, :])
y_pred = np.zeros_like(y)
y_pred[y == 1] = y_pred_ground
# params = list(zip(training_scales, training_tols))
# update_step_idx = params.index((scale, tol))
# updated[(y == 1) & (y_pred == 0)] = update_step_idx
if verbose:
n_removed_clf = np.sum((y == 1) & (y_pred == 0))
print(
"Scale: {:.2f}, Relative height: {:.1e}".format(scale, tol)
)
print(
"Reclassified {} ground points as nonground ({:.2f} %)".format(
n_removed_clf, 100 * (n_removed_clf / n_points)
)
)
y[(y == 1) & (y_pred == 0)] = 0
except ValueError as e:
print("Skipping classification update. ")
print("ValueError: " + str(e))
ground = y == 1
data = data[ground, :]
n_removed = np.sum(y == 0)
converged = 100 * (n_removed / n_points) < thresh
reached_max_iter = niter >= max_iter
if reached_max_iter and verbose:
print("Reached maximum number of iterations ({})".format(max_iter))
niter += 1
labels = intersect_rows(data, original_data)
if verbose:
n_ground = data.shape[0]
n_points = original_data.shape[0]
print()
print(
"Retained {} ground points ({:.2f} %)".format(
n_ground, 100 * (n_ground / n_points)
)
)
if use_las_codes:
labels[labels == 0] = 4 # Vegetation
labels[labels == 1] = 2 # Ground
return data, labels # , updated