"""
Deals with creating a grid spatial index
"""
# This file is part of PyLidar
# Copyright (C) 2015 John Armston, Pete Bunting, Neil Flood, Sam Gillingham
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function, division
import os
import sys
import copy
import numpy
import tempfile
from pylidar import lidarprocessor
from pylidar.lidarformats import spdv4
from pylidar.lidarformats import las
from pylidar.lidarformats import generic
from pylidar.basedriver import Extent
from rios import cuiprogress
from rios import pixelgrid
if sys.version_info[0] > 2:
# hack for Python 3 which uses str instead of basestring
# we just use basestring
basestring = str
"""
Number of blocks to divide the extent up into for the longest axis
if not given a blockSize.
"""
BLOCKSIZE_N_BLOCKS = 2
"""
Types of spatial indices. Copied from spdv4.
"""
INDEX_CARTESIAN = spdv4.SPDV4_INDEX_CARTESIAN
INDEX_SPHERICAL = spdv4.SPDV4_INDEX_SPHERICAL
INDEX_CYLINDRICAL = spdv4.SPDV4_INDEX_CYLINDRICAL
INDEX_POLAR = spdv4.SPDV4_INDEX_POLAR
INDEX_SCAN = spdv4.SPDV4_INDEX_SCAN
"""
Types of pulse indexing methods. Copied from spdv4.
"""
PULSE_INDEX_FIRST_RETURN = spdv4.SPDV4_PULSE_INDEX_FIRST_RETURN
PULSE_INDEX_LAST_RETURN = spdv4.SPDV4_PULSE_INDEX_LAST_RETURN
PULSE_INDEX_START_WAVEFORM = spdv4.SPDV4_PULSE_INDEX_START_WAVEFORM
PULSE_INDEX_END_WAVEFORM = spdv4.SPDV4_PULSE_INDEX_END_WAVEFORM
PULSE_INDEX_ORIGIN = spdv4.SPDV4_PULSE_INDEX_ORIGIN
PULSE_INDEX_MAX_INTENSITY = spdv4.SPDV4_PULSE_INDEX_MAX_INTENSITY
[docs]def createGridSpatialIndex(infile, outfile, binSize=1.0, blockSize=None,
tempDir=None, extent=None, indexType=INDEX_CARTESIAN,
pulseIndexMethod=PULSE_INDEX_FIRST_RETURN, wkt=None):
"""
Creates a grid spatially indexed file from a non spatial input file.
Currently only supports creation of a SPD V4 file.
Creates a tempfile for every block (using blockSize) and them merges them into the output
building a spatial index as it goes.
If blockSize isn't set then it is picked using BLOCKSIZE_N_BLOCKS.
binSize is the size of the bins to create the spatial index.
if tempDir is none a temporary directory will be created with tempfile.mkdtemp
and removed at the end of processing.
extent is an Extent object specifying the extent to work within.
indexType is one of the INDEX_* constants.
pulseIndexMethod is one of the PULSE_INDEX_* constants.
wkt is the projection to use for the output. Copied from the input if
not supplied.
nPulsesPerChunkMerge is the number of pulses to process at a time
when merging.
"""
removeTempDir = False
if tempDir is None:
tempDir = tempfile.mkdtemp()
removeTempDir = True
header, extent, extentList = splitFileIntoTiles(infile, binSize=binSize,
blockSize=blockSize, tempDir=tempDir, extent=extent,
indexType=indexType, pulseIndexMethod=pulseIndexMethod)
# update header
header['INDEX_TLX'] = extent.xMin
header['INDEX_TLY'] = extent.yMax
header['INDEX_TYPE'] = indexType
header['PULSE_INDEX_METHOD'] = pulseIndexMethod
header['BIN_SIZE'] = binSize
if wkt is None:
wkt = header['SPATIAL_REFERENCE']
if len(wkt) == 0:
wkt = getDefaultWKT()
indexAndMerge(extentList, extent, wkt, outfile, header)
# delete the temp files
for fname, extent in extentList:
os.remove(fname)
# we must have created this directory - remove it
if removeTempDir:
os.rmdir(tempDir)
[docs]def splitFileIntoTiles(infiles, binSize=1.0, blockSize=None,
tempDir='.', extent=None, indexType=INDEX_CARTESIAN,
pulseIndexMethod=PULSE_INDEX_FIRST_RETURN,
footprint=lidarprocessor.UNION, outputFormat='SPDV4',
buildPulses=False):
"""
Takes a filename (or list of filenames) and creates a tempfile for every
block (using blockSize).
If blockSize isn't set then it is picked using BLOCKSIZE_N_BLOCKS.
binSize is the size of the bins to create the spatial index.
indexType is one of the INDEX_* constants.
pulseIndexMethod is one of the PULSE_INDEX_* constants.
footprint is one of lidarprocessor.UNION or lidarprocessor.INTERSECTION
and is how to combine extents if there is more than one file.
outputFormat is either 'SPDV4' or 'LAS'. 'LAS' outputs only supported
when input is 'LAS'.
buildPulses relevant for 'LAS' and determines whether to build the
pulse structure or not.
returns the header of the first input file, the extent used and a list
of (fname, extent) tuples that contain the information for
each tempfile.
"""
if isinstance(infiles, basestring):
infiles = [infiles]
# use the first file for header. Not
# clear how to combine headers from multiple inputs
# or if one should.
# leave setting this in case we grab it when working out the extent.
firstHeader = None
if extent is None:
# work out from headers
pixGrid = None
for infile in infiles:
info = generic.getLidarFileInfo(infile)
header = info.header
if firstHeader is None:
firstHeader = header
try:
if indexType == INDEX_CARTESIAN:
xMax = header['X_MAX']
xMin = header['X_MIN']
yMax = header['Y_MAX']
yMin = header['Y_MIN']
elif indexType == INDEX_SPHERICAL:
xMax = header['AZIMUTH_MAX']
xMin = header['AZIMUTH_MIN']
yMax = header['ZENITH_MAX']
yMin = header['ZENITH_MIN']
elif indexType == INDEX_SCAN:
xMax = header['SCANLINE_IDX_MAX']
xMin = header['SCANLINE_IDX_MIN']
yMax = header['SCANLINE_MAX']
yMin = header['SCANLINE_MIN']
else:
msg = 'unsupported indexing method'
raise generic.LiDARSpatialIndexNotAvailable(msg)
except KeyError:
msg = 'info for creating bounding box not available'
raise generic.LiDARFunctionUnsupported(msg)
newPixGrid = pixelgrid.PixelGridDefn(xMin=xMin, xMax=xMax,
yMin=yMin, yMax=yMax, xRes=binSize, yRes=binSize)
if pixGrid is None:
pixGrid = newPixGrid
elif footprint == lidarprocessor.UNION:
pixGrid = pixGrid.union(newPixGrid)
elif footprint == lidarprocessor.INTERSECTION:
pixGrid = pixGrid.intersection(newPixGrid)
else:
msg = 'Unsupported footprint option'
raise generic.LiDARFunctionUnsupported(msg)
# TODO: we treat points as being in the block when they are >=
# the min coords and < the max coords. What happens on the bottom
# and right margins?? We could possibly miss points that are there.
# round the coords to the nearest multiple
xMin = numpy.floor(pixGrid.xMin / binSize) * binSize
yMin = numpy.floor(pixGrid.yMin / binSize) * binSize
xMax = numpy.ceil(pixGrid.xMax / binSize) * binSize
yMax = numpy.ceil(pixGrid.yMax / binSize) * binSize
extent = Extent(xMin, xMax, yMin, yMax, binSize)
else:
# ensure that our binSize comes from their exent
binSize = extent.binSize
# get the first header since we aren't doing the above
info = generic.getLidarFileInfo(infiles[0])
firstHeader = info.header
if blockSize is None:
minAxis = min(extent.xMax - extent.xMin, extent.yMax - extent.yMin)
blockSize = min(minAxis / BLOCKSIZE_N_BLOCKS, 200.0)
# make it a multiple of binSize
blockSize = int(numpy.ceil(blockSize / binSize)) * binSize
else:
# ensure that their given block size can be evenly divided by
# the binSize
# the modulo operator doesn't work too well with floats
# so we take a different approach
a = blockSize / binSize
if a != int(a):
msg = 'blockSize must be evenly divisible be the binSize'
raise generic.LiDARInvalidData(msg)
extentList = []
subExtent = Extent(extent.xMin, extent.xMin + blockSize,
extent.yMax - blockSize, extent.yMax, binSize)
controls = lidarprocessor.Controls()
controls.setSpatialProcessing(False)
tmpSuffix = '.' + outputFormat.lower()
bMoreToDo = True
while bMoreToDo:
fd, fname = tempfile.mkstemp(suffix=tmpSuffix, dir=tempDir)
os.close(fd)
userClass = lidarprocessor.LidarFile(fname, generic.CREATE)
if outputFormat == 'SPDV4':
userClass.setLiDARDriverOption('SCALING_BUT_NO_DATA_WARNING', False)
driver = spdv4.SPDV4File(fname, generic.CREATE, controls, userClass)
elif outputFormat == 'LAS':
driver = las.LasFile(fname, generic.CREATE, controls, userClass)
else:
msg = 'Unsupported output format %s' % outputFormat
raise generic.LiDARFunctionUnsupported(msg)
data = (copy.copy(subExtent), driver)
extentList.append(data)
# move it along
subExtent.xMin += blockSize
subExtent.xMax += blockSize
if subExtent.xMin >= extent.xMax:
# next line down
subExtent.xMin = extent.xMin
subExtent.xMax = extent.xMin + blockSize
subExtent.yMax -= blockSize
subExtent.yMin -= blockSize
# done?
bMoreToDo = subExtent.yMax > extent.yMin
# ok now set up to read the input files using lidarprocessor
dataFiles = lidarprocessor.DataFiles()
dataFiles.inputs = []
for infile in infiles:
input = lidarprocessor.LidarFile(infile, lidarprocessor.READ)
# must be a better way of doing this, but this is what
# translate does. We don't know what formats we are getting ahead of time
info = generic.getLidarFileInfo(infile)
inFormat = info.getDriverName()
if inFormat == 'LAS':
input.setLiDARDriverOption('BUILD_PULSES', buildPulses)
dataFiles.inputs.append(input)
controls = lidarprocessor.Controls()
progress = cuiprogress.GDALProgressBar()
progress.setLabelText('Splitting...')
controls.setProgress(progress)
controls.setSpatialProcessing(False)
controls.setMessageHandler(lidarprocessor.silentMessageFn)
otherArgs = lidarprocessor.OtherArgs()
otherArgs.outList = extentList
otherArgs.indexType = indexType
otherArgs.pulseIndexMethod = pulseIndexMethod
lidarprocessor.doProcessing(classifyFunc, dataFiles, controls=controls,
otherArgs=otherArgs)
# close all the output files and save their names to return
newExtentList = []
for subExtent, driver in extentList:
fname = driver.fname
driver.close()
data = (fname, subExtent)
newExtentList.append(data)
return firstHeader, extent, newExtentList
[docs]def getDefaultWKT():
"""
When processing data in sensor or project coordinates we may not have a WKT.
However, rios.pixelgrid requires something. For now
return the WKT for GDA96/MGA zone 55 until we think of something
better.
"""
from osgeo import osr
sr = osr.SpatialReference()
sr.ImportFromEPSG(28355)
return sr.ExportToWkt()
[docs]def copyScaling(input, output):
"""
Copy the known scaling required fields accross.
Internal method. Called from classifyFunc.
"""
for arrayType in (lidarprocessor.ARRAY_TYPE_PULSES,
lidarprocessor.ARRAY_TYPE_POINTS, lidarprocessor.ARRAY_TYPE_WAVEFORMS):
try:
fields = output.getScalingColumns(arrayType)
except generic.LiDARInvalidSetting:
continue
for field in fields:
try:
gain, offset = input.getScaling(field, arrayType)
output.setScaling(field, arrayType, gain, offset)
except generic.LiDARFileException:
pass
[docs]def setScalingForCoordField(driver, srcfield, coordfield):
"""
Internal method to set the output scaling for range of data.
"""
# srcfield and coordfield might not be the same type (pulses/points)
# this happens when we are using point X and Y to set pulse X_IDX, Y_IDX etc
if srcfield in spdv4.PULSE_SCALED_FIELDS:
gain, offset = driver.getScaling(srcfield, lidarprocessor.ARRAY_TYPE_PULSES)
elif srcfield in spdv4.POINT_SCALED_FIELDS:
gain, offset = driver.getScaling(srcfield, lidarprocessor.ARRAY_TYPE_POINTS)
else:
# we need to set these in case an unscaled variable is used for the spatial index
# e.g. using pulse SCANLINE and SCANLINE_IDX to set pulse X_IDX, Y_IDX
gain = 1.0
offset = 0.0
if coordfield in spdv4.PULSE_SCALED_FIELDS:
driver.setScaling(coordfield, lidarprocessor.ARRAY_TYPE_PULSES, gain, offset)
elif coordfield in spdv4.POINT_SCALED_FIELDS:
driver.setScaling(coordfield, lidarprocessor.ARRAY_TYPE_POINTS, gain, offset)
[docs]def classifyFunc(data, otherArgs):
"""
Called by lidarprocessor. Looks at the input data and splits into
the appropriate output files.
"""
for input in data.inputs:
pulses = input.getPulses()
points = input.getPointsByPulse()
waveformInfo = input.getWaveformInfo()
recv = input.getReceived()
trans = input.getTransmitted()
# With LAS sometimes this happens. Not sure why...
if len(pulses) == 0:
continue
for extent, driver in otherArgs.outList:
if data.info.isFirstBlock():
# deal with scaling. There must be a better way to do this.
copyScaling(input, driver)
# TODO: should we always be able to rely on X_IDX, Y_IDX for
# whatever index we are building?
# No - as the values of these columns may have to change if the
# properties of the spatial indexing method change
if otherArgs.indexType == INDEX_CARTESIAN:
xIdxFieldName = 'X'
yIdxFieldName = 'Y'
xIdx, yIdx = indexPulses(pulses, points, otherArgs.pulseIndexMethod)
elif otherArgs.indexType == INDEX_SPHERICAL:
xIdxFieldName = 'AZIMUTH'
yIdxFieldName = 'ZENITH'
xIdx, yIdx = pulses[xIdxFieldName], pulses[yIdxFieldName]
elif otherArgs.indexType == INDEX_SCAN:
xIdxFieldName = 'SCANLINE_IDX'
yIdxFieldName = 'SCANLINE'
xIdx, yIdx = pulses[xIdxFieldName], pulses[yIdxFieldName]
else:
msg = 'unsupported indexing method'
raise generic.LiDARSpatialIndexNotAvailable(msg)
# ensure the scaling of X_IDX & Y_IDX matches the data we are putting in it
if driver.getDriverName() == 'SPDV4':
# only makes sense for SPDV4 since LAS doesn't really have an X_IDX etc
setScalingForCoordField(driver, xIdxFieldName, 'X_IDX')
setScalingForCoordField(driver, yIdxFieldName, 'Y_IDX')
# this the expression used in the spatial index building
# so we are consistent.
mask = ((xIdx >= extent.xMin) & (xIdx < extent.xMax) &
(yIdx > extent.yMin) & (yIdx <= extent.yMax))
# subset the data
pulsesSub = pulses[mask]
# this is required otherwise the pulses get stripped out
# when we write the pulses in spatial mode (in indexAndMerge)
pulsesSub['X_IDX'] = xIdx[mask]
pulsesSub['Y_IDX'] = yIdx[mask]
# subset the other data also to match
pointsSub = points[..., mask]
waveformInfoSub = None
recvSub = None
transSub = None
if waveformInfo is not None and waveformInfo.size > 0:
waveformInfoSub = waveformInfo[...,mask]
if recv is not None and recv.size > 0:
recvSub = recv[:,:,mask]
if trans is not None and trans.size > 0:
transSub = trans[:,:,mask]
driver.writeData(pulsesSub, pointsSub, transSub, recvSub,
waveformInfoSub)
[docs]def indexPulses(pulses, points, pulseIndexMethod):
"""
Internal method to assign a point coordinates to the X_IDX and Y_IDX
columns based on the user specified pulse_index_method.
"""
if pulseIndexMethod == PULSE_INDEX_FIRST_RETURN:
if points.shape[0] > 0:
xIdx = points['X'][0, ...]
yIdx = points['Y'][0, ...]
else:
xIdx = numpy.zeros(points.shape[1],dtype=points['X'].dtype)
yIdx = numpy.zeros(points.shape[1],dtype=points['Y'].dtype)
elif pulseIndexMethod == PULSE_INDEX_LAST_RETURN:
if points.shape[0] > 0:
firstfield = points.dtype.names[0]
last = points[firstfield].count(axis=0) - 1
idx = numpy.arange(last.size)
xIdx = points['X'][last, idx]
yIdx = points['Y'][last, idx]
else:
xIdx = numpy.zeros(points.shape[1],dtype=points['X'].dtype)
yIdx = numpy.zeros(points.shape[1],dtype=points['Y'].dtype)
else:
msg = 'unsupported pulse indexing method'
raise generic.LiDARPulseIndexUnsupported(msg)
return xIdx, yIdx
[docs]def indexAndMerge(extentList, extent, wkt, outfile, header):
"""
Internal method to merge all the temporary files into the output
spatially indexing as we go.
"""
controls = lidarprocessor.Controls()
controls.setSpatialProcessing(False)
# open in read mode
driverExtentList = []
for fname, subExtent in extentList:
userClass = lidarprocessor.LidarFile(fname, generic.READ)
driver = spdv4.SPDV4File(fname, generic.READ, controls, userClass)
data = (subExtent, driver)
driverExtentList.append(data)
# create output file
userClass = lidarprocessor.LidarFile(outfile, generic.CREATE)
userClass.setLiDARDriverOption('SCALING_BUT_NO_DATA_WARNING', False)
controls = lidarprocessor.Controls()
controls.setSpatialProcessing(True)
outDriver = spdv4.SPDV4File(outfile, generic.CREATE, controls, userClass)
pixGrid = pixelgrid.PixelGridDefn(xMin=extent.xMin, xMax=extent.xMax,
yMin=extent.yMin, yMax=extent.yMax, projection=wkt,
xRes=extent.binSize, yRes=extent.binSize)
outDriver.setPixelGrid(pixGrid)
# update header
nrows,ncols = pixGrid.getDimensions()
header['NUMBER_BINS_X'] = ncols
header['NUMBER_BINS_Y'] = nrows
# clobber these values since we don't want to
# start with the number in the original file
# they will be reset to 0 in the new file
del header['NUMBER_OF_POINTS']
del header['NUMBER_OF_PULSES']
# these too
del header['GENERATING_SOFTWARE']
del header['CREATION_DATETIME']
progress = cuiprogress.GDALProgressBar()
progress.setLabelText('Merging...')
progress.setTotalSteps(len(extentList))
progress.setProgress(0)
nFilesProcessed = 0
nFilesWritten = 0
for subExtent, driver in driverExtentList:
# read in all the data
# NOTE: can't write data in blocks as the driver needs to be able to
# sort all the data in one go.
bDataWritten = False
npulses = driver.getTotalNumberPulses()
if npulses > 0:
pulseRange = generic.PulseRange(0, npulses)
driver.setPulseRange(pulseRange)
pulses = driver.readPulsesForRange()
points = driver.readPointsByPulse()
waveformInfo = driver.readWaveformInfo()
recv = driver.readReceived()
trans = driver.readTransmitted()
outDriver.setExtent(subExtent)
if nFilesWritten == 0:
copyScaling(driver, outDriver)
outDriver.setHeader(header)
# on create, a spatial index is created
outDriver.writeData(pulses, points, trans, recv,
waveformInfo)
nFilesWritten += 1
# close the driver while we are here
driver.close()
if bDataWritten:
nFilesWritten += 1
nFilesProcessed += 1
progress.setProgress(nFilesProcessed)
outDriver.close()