Source code for pylidar.lidarformats.spdv4


"""
SPD V4 format driver and support functions

Write Driver Options
--------------------

These are contained in the WRITESUPPORTEDOPTIONS module level variable.

+-----------------------------+-------------------------------------------+
| Name                        | Use                                       |
+=============================+===========================================+
| SCALING_BUT_NO_DATA_WARNING | Warn when scaling set for a column that   |
|                             | doesn't get created. Defaults to True     |
+-----------------------------+-------------------------------------------+
| HDF5_CHUNK_SIZE             | Set the HDF5 chunk size when creating     |
|                             | columns. Defaults to 250.                 |
+-----------------------------+-------------------------------------------+

"""
# This file is part of PyLidar
# Copyright (C) 2015 John Armston, Pete Bunting, Neil Flood, Sam Gillingham
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function, division

import sys
import copy
import numpy
import h5py
from numba import jit
from rios import pixelgrid
from . import generic
from . import gridindexutils
from . import h5space
from . import spdv4_index

WRITESUPPORTEDOPTIONS = ('SCALING_BUT_NO_DATA_WARNING', 
            'HDF5_CHUNK_SIZE')
"driver options"
READSUPPORTEDOPTIONS = ()
"driver options"

"Default hdf5 chunk size set on column creation"
DEFAULT_HDF5_CHUNK_SIZE = 250

HEADER_FIELDS = {'AZIMUTH_MAX' : numpy.float64, 'AZIMUTH_MIN' : numpy.float64,
'BANDWIDTHS' : numpy.float32, 'BIN_SIZE' : numpy.float32,
'BLOCK_SIZE_POINT' : numpy.uint16, 'BLOCK_SIZE_PULSE' : numpy.uint16,
'BLOCK_SIZE_WAVEFORM' : numpy.uint16,
'BLOCK_SIZE_RECEIVED' : numpy.uint16, 'BLOCK_SIZE_TRANSMITTED' : numpy.uint16,
'CAPTURE_DATETIME' : bytes, 'CREATION_DATETIME' : bytes,
'FIELD_OF_VIEW' : numpy.float32,
'GENERATING_SOFTWARE' : bytes, 'INDEX_TYPE' : numpy.uint16,
'INDEX_TLX' : numpy.float64, 'INDEX_TLY' : numpy.float64,
'NUMBER_BINS_X' : numpy.uint32, 'NUMBER_BINS_Y' : numpy.uint32,
'NUMBER_OF_POINTS' : numpy.uint64, 'NUMBER_OF_PULSES' : numpy.uint64,
'NUMBER_OF_WAVEFORMS' : numpy.uint64,
'NUM_OF_WAVELENGTHS' : numpy.uint16, 'POINT_DENSITY' : numpy.float32,
'PULSE_ALONG_TRACK_SPACING' : numpy.float32, 
'PULSE_ANGULAR_SPACING_SCANLINE' : numpy.float32,
'PULSE_ANGULAR_SPACING_SCANLINE_IDX' : numpy.float32,
'PULSE_ACROSS_TRACK_SPACING' : numpy.float32, 'PULSE_DENSITY' : numpy.float32,
'PULSE_ENERGY' : numpy.float32, 'PULSE_FOOTPRINT' : numpy.float32,
'PULSE_INDEX_METHOD' : numpy.uint16, 'RANGE_MAX' : numpy.float32,
'RANGE_MIN' : numpy.float32, 
'SCANLINE_IDX_MAX' : numpy.uint32, 'SCANLINE_IDX_MIN' : numpy.uint32,
'SCANLINE_MAX' : numpy.uint16, 'SCANLINE_MIN' : numpy.uint16,
'SENSOR_APERTURE_SIZE' : numpy.float32, 'SENSOR_BEAM_DIVERGENCE' : numpy.float32,
'SENSOR_HEIGHT' : numpy.float32, 'SENSOR_MAX_SCAN_ANGLE' : numpy.float32,
'SENSOR_PULSE_REPETITION_FREQ' : numpy.float32,
'SENSOR_SCAN_RATE' : numpy.float32, 'SENSOR_SPEED' : numpy.float32,
'SENSOR_TEMPORAL_BIN_SPACING' : numpy.float64, 
'SENSOR_BEAM_EXIT_DIAMETER' : numpy.float32,
'SPATIAL_REFERENCE' : bytes,
'SYSTEM_IDENTIFIER' : bytes, 'USER_META_DATA' : bytes,
'VERSION_SPD' : numpy.uint8, 'VERSION_DATA' : numpy.uint8, 
'WAVEFORM_BIT_RES' : numpy.uint16, 'WAVELENGTHS' : numpy.float32,
'X_MAX' : numpy.float64, 'X_MIN' : numpy.float64, 'Y_MAX' : numpy.float64,
'Y_MIN' : numpy.float64, 'Z_MAX' : numpy.float64, 'Z_MIN' : numpy.float64,
'HEIGHT_MIN' : numpy.float32, 'HEIGHT_MAX' : numpy.float32, 
'ZENITH_MAX' : numpy.float64, 'ZENITH_MIN' : numpy.float64,
'RGB_FIELD' : bytes}
"Header fields have defined type in SPDV4"

HEADER_ARRAY_FIELDS = ('BANDWIDTHS', 'WAVELENGTHS', 'VERSION_SPD', 'VERSION_DATA', 'RGB_FIELD')
"fields in the header that are actually arrays"

PULSES_ESSENTIAL_FIELDS = ()
"Note: PULSE_ID, NUMBER_OF_RETURNS and PTS_START_IDX always created by pylidar"
POINTS_ESSENTIAL_FIELDS = ('X', 'Y', 'Z', 'CLASSIFICATION')
"RETURN_NUMBER always created by pylidar"
HEADER_ESSENTIAL_FIELDS = ('SPATIAL_REFERENCE', 'VERSION_DATA')
"VERSION_SPD always created by pylidar"

PULSE_FIELDS = {'PULSE_ID' : numpy.uint64, 'TIMESTAMP' : numpy.uint64,
'NUMBER_OF_RETURNS' : numpy.uint8, 'AZIMUTH' : numpy.uint32, 
'ZENITH' : numpy.uint32, 'SOURCE_ID' : numpy.uint16, 
'PULSE_WAVELENGTH_IDX' : numpy.uint8, 'NUMBER_OF_WAVEFORM_SAMPLES' : numpy.uint8,
'WFM_START_IDX' : numpy.uint64, 'PTS_START_IDX' : numpy.uint64, 
'SCANLINE' : numpy.uint32, 'SCANLINE_IDX' : numpy.uint16, 'X_IDX' : numpy.uint32,
'Y_IDX' : numpy.uint32, 'X_ORIGIN' : numpy.uint32, 'Y_ORIGIN' : numpy.uint32,
'Z_ORIGIN' : numpy.uint32, 'H_ORIGIN' : numpy.uint32, 'PULSE_FLAGS' : numpy.uint8,
'AMPLITUDE_PULSE' : numpy.uint16, 'WIDTH_PULSE' : numpy.uint16, 
'SCAN_ANGLE_RANK' : numpy.int16, 'PRISM_FACET' : numpy.uint8}
"Thes fields have defined type"

POINT_FIELDS = {'RANGE' : numpy.uint32, 'RETURN_NUMBER' : numpy.uint8,
'X' : numpy.uint32, 'Y' : numpy.uint32, 'Z' : numpy.uint32, 
'HEIGHT' : numpy.uint16, 'CLASSIFICATION' : numpy.uint8, 
'POINT_FLAGS' : numpy.uint8, 'INTENSITY' : numpy.uint16, 
'AMPLITUDE_RETURN' : numpy.uint16, 'WIDTH_RETURN' : numpy.uint16, 
'RED' : numpy.uint16, 'GREEN' : numpy.uint16, 'BLUE' : numpy.uint16, 
'NIR' : numpy.uint16, 'RHO_APP' : numpy.uint32, 'DEVIATION' : numpy.uint16,
'ECHO_TYPE' : numpy.uint16, 'POINT_WAVELENGTH_IDX' : numpy.uint8}
"Thes fields have defined type"

WAVEFORM_FIELDS = {'NUMBER_OF_WAVEFORM_RECEIVED_BINS' : numpy.uint16,
'NUMBER_OF_WAVEFORM_TRANSMITTED_BINS' : numpy.uint16, 
'RANGE_TO_WAVEFORM_START' : numpy.uint32, 'RECEIVED_START_IDX' : numpy.uint64,
'TRANSMITTED_START_IDX' : numpy.uint64, 'CHANNEL' : numpy.uint8,
'WAVEFORM_FLAGS' : numpy.uint8, 'WFM_WAVELENGTH_IDX' : numpy.uint8, 
'RECEIVE_WAVE_GAIN' : numpy.float32, 'RECEIVE_WAVE_OFFSET' :  numpy.float32,
'TRANS_WAVE_GAIN' : numpy.float32, 'TRANS_WAVE_OFFSET' : numpy.float32}
"Thes fields have defined type"

PULSE_SCALED_FIELDS = ('AZIMUTH', 'ZENITH', 'X_IDX', 'Y_IDX', 
'X_ORIGIN', 'Y_ORIGIN', 'Z_ORIGIN', 'H_ORIGIN', 'AMPLITUDE_PULSE', 
'WIDTH_PULSE')
"need scaling applied"
POINT_SCALED_FIELDS = ('X', 'Y', 'Z', 'HEIGHT', 'RANGE', 'INTENSITY', 
'AMPLITUDE_RETURN', 'WIDTH_RETURN')
"need scaling applied"
WAVEFORM_SCALED_FIELDS = ('RANGE_TO_WAVEFORM_START',)
"need scaling applied"

TRANSMITTED_DTYPE = numpy.uint32
"dtype for transmitted array"
RECEIVED_DTYPE = numpy.uint32
"dtype for received array"

GAIN_NAME = 'GAIN'
"For storing in hdf5 attributes"
OFFSET_NAME = 'OFFSET'
"For storing in hdf5 attributes"
NULL_NAME = 'NULL'
"For storing in hdf5 attributes"

SPDV4_INDEX_CARTESIAN = spdv4_index.SPDV4_INDEX_CARTESIAN
"types of indexing in the file"
SPDV4_INDEX_SPHERICAL = spdv4_index.SPDV4_INDEX_SPHERICAL
"types of indexing in the file"
SPDV4_INDEX_CYLINDRICAL = spdv4_index.SPDV4_INDEX_CYLINDRICAL
"types of indexing in the file"
SPDV4_INDEX_POLAR = spdv4_index.SPDV4_INDEX_POLAR
"types of indexing in the file"
SPDV4_INDEX_SCAN = spdv4_index.SPDV4_INDEX_SCAN
"types of indexing in the file"

SPDV4_PULSE_INDEX_FIRST_RETURN = 0
"pulse indexing methods"
SPDV4_PULSE_INDEX_LAST_RETURN = 1
"pulse indexing methods"
SPDV4_PULSE_INDEX_START_WAVEFORM = 2
"pulse indexing methods"
SPDV4_PULSE_INDEX_END_WAVEFORM = 3
"pulse indexing methods"
SPDV4_PULSE_INDEX_ORIGIN = 4
"pulse indexing methods"
SPDV4_PULSE_INDEX_MAX_INTENSITY = 5
"pulse indexing methods"
SPDV4_PULSE_INDEX_GROUND = 6 # Reserved
"pulse indexing methods"
SPDV4_PULSE_INDEX_ZPLANE = 7 # Reserved
"pulse indexing methods"

SPDV4_INDEXTYPE_SIMPLEGRID = spdv4_index.SPDV4_INDEXTYPE_SIMPLEGRID
"types of spatial indices"

SPDV4_SIMPLEGRID_COUNT_DTYPE = spdv4_index.SPDV4_SIMPLEGRID_COUNT_DTYPE
"data types for the spatial index"
SPDV4_SIMPLEGRID_INDEX_DTYPE = spdv4_index.SPDV4_SIMPLEGRID_COUNT_DTYPE
"data types for the spatial index"

SPDV4_POINT_FLAGS_IGNORE = 1
"flags for POINT_FLAGS"
SPDV4_POINT_FLAGS_OVERLAP = 2
"flags for POINT_FLAGS"
SPDV4_POINT_FLAGS_SYNTHETIC = 4
"flags for POINT_FLAGS"
SPDV4_POINT_FLAGS_KEY_POINT = 8
"flags for POINT_FLAGS"
SPDV4_POINT_FLAGS_WAVEFORM = 16
"flags for POINT_FLAGS"

SPDV4_PULSE_FLAGS_IGNORE = 1
"flags for PULSE_FLAGS"
SPDV4_PULSE_FLAGS_OVERLAP = 2
"flags for PULSE_FLAGS"
SPDV4_PULSE_FLAGS_SCANLINE_DIRECTION = 4
"flags for PULSE_FLAGS"
SPDV4_PULSE_FLAGS_SCANLINE_EDGE = 8
"flags for PULSE_FLAGS"

SPDV4_WAVEFORM_FLAGS_IGNORE = 1
"flags for WAVEFORM_FLAGS"
SPDV4_WAVEFORM_FLAGS_SATURATION_FIXED = 2
"flags for WAVEFORM_FLAGS"
SPDV4_WAVEFORM_FLAGS_BASELINE_FIXED = 4
"flags for WAVEFORM_FLAGS"

SPDV4_VERSION_MAJOR = 4
"version - major"
SPDV4_VERSION_MINOR = 0
"version - minor"

POINTS_HEADER_UPDATE_DICT = {'X' : ('X_MIN', 'X_MAX'), 'Y' : ('Y_MIN', 'Y_MAX'),
        'Z' : ('Z_MIN', 'Z_MAX'), 'HEIGHT' : ('HEIGHT_MIN', 'HEIGHT_MAX')}
PULSES_HEADER_UPDATE_DICT = {'ZENITH' : ('ZENITH_MIN', 'ZENITH_MAX'),
        'AZIMUTH' : ('AZIMUTH_MIN', 'AZIMUTH_MAX'), 
        'SCANLINE_IDX' : ('SCANLINE_IDX_MIN', 'SCANLINE_IDX_MAX'),
        'SCANLINE' : ('SCANLINE_MIN', 'SCANLINE_MAX')}
WAVEFORMS_HEADER_UPDATE_DICT = {'RANGE' : ('RANGE_MIN', 'RANGE_MAX')}
"for updating the header"

SPDV4_CLASSIFICATION_UNDEFINED = 0
"classification codes"
SPDV4_CLASSIFICATION_UNCLASSIFIED = 1
"classification codes"
SPDV4_CLASSIFICATION_CREATED = 2
"classification codes"
SPDV4_CLASSIFICATION_GROUND = 3
"classification codes"
SPDV4_CLASSIFICATION_LOWVEGE = 4
"classification codes"
SPDV4_CLASSIFICATION_MEDVEGE = 5
"classification codes"
SPDV4_CLASSIFICATION_HIGHVEGE = 6
"classification codes"
SPDV4_CLASSIFICATION_BUILDING = 7
"classification codes"
SPDV4_CLASSIFICATION_WATER = 8
"classification codes"
SPDV4_CLASSIFICATION_TRUNK = 9
"classification codes"
SPDV4_CLASSIFICATION_FOLIAGE = 10
"classification codes"
SPDV4_CLASSIFICATION_BRANCH = 11
"classification codes"
SPDV4_CLASSIFICATION_WALL = 12
"classification codes"
SPDV4_CLASSIFICATION_RAIL = 13
"classification codes"

HEADER_TRANSLATION_DICT = {generic.HEADER_NUMBER_OF_POINTS : 'NUMBER_OF_POINTS'}
"Translation of header field names"

@jit
def flatten3dWaveformData(wavedata, inmask, nrecv, flattened):
    """
    Helper routine that flattens transmitted or received (from another driver)
    into something that can be written. 
    wavedata is either the (3d) transmitted or received
    inmask is the .mask for wavedata
    nrecv is the output array for waveformInfo's NUMBER_OF_WAVEFORM_RECEIVED_BINS etc
    flattened is the flattened version of wavedata
    """
    nbins = wavedata.shape[0]
    nwaves = wavedata.shape[1]
    npulses = wavedata.shape[2]
    nrecv_idx = 0
    flat_idx = 0
    for p in range(npulses):
        for w in range(nwaves):
            c = 0
            for b in range(nbins):
                if not inmask[b, w, p]:
                    c += 1
                    flattened[flat_idx] = wavedata[b, w, p]
                    flat_idx += 1
            if c > 0:
                nrecv[nrecv_idx] = c
                nrecv_idx += 1

[docs]class SPDV4File(generic.LiDARFile): """ Class to support reading and writing of SPD Version 4.x files. Uses h5py to handle access to the underlying HDF5 file. """ def __init__(self, fname, mode, controls, userClass): generic.LiDARFile.__init__(self, fname, mode, controls, userClass) # convert mode into h5py mode string if mode == generic.READ: h5py_mode = 'r' elif mode == generic.UPDATE: h5py_mode = 'r+' elif mode == generic.CREATE: h5py_mode = 'w' else: raise ValueError('Unknown value for mode parameter') # check driver options if mode == generic.READ: options = READSUPPORTEDOPTIONS else: options = WRITESUPPORTEDOPTIONS for key in userClass.lidarDriverOptions: if key not in options: msg = '%s not a supported SPDV4 option' % repr(key) raise generic.LiDARInvalidSetting(msg) # warn on scaling, but no data self.scalingButNoDataWarning = True if 'SCALING_BUT_NO_DATA_WARNING' in userClass.lidarDriverOptions: self.scalingButNoDataWarning = ( userClass.lidarDriverOptions['SCALING_BUT_NO_DATA_WARNING']) # type of spatial index to use self.preferredSpatialIndex = SPDV4_INDEXTYPE_SIMPLEGRID # create index on update - only valid for more advanced indices self.createIndexOnUpdate = False # hdf5 chunk size - as a tuple - columns are 1d self.hdf5ChunkSize = (DEFAULT_HDF5_CHUNK_SIZE,) if 'HDF5_CHUNK_SIZE' in userClass.lidarDriverOptions: self.hdf5ChunkSize = (userClass.lidarDriverOptions['HDF5_CHUNK_SIZE'],) # attempt to open the file try: self.fileHandle = h5py.File(fname, h5py_mode) except (OSError, IOError) as err: # always seems to throw an OSError # found another one! raise generic.LiDARFormatNotUnderstood(str(err)) # check that it is indeed the right version # and get attributes fileAttrs = self.fileHandle.attrs if mode == generic.READ or mode == generic.UPDATE: if not 'VERSION_SPD' in fileAttrs: msg = "File appears not to be SPD" raise generic.LiDARFormatNotUnderstood(msg) elif fileAttrs['VERSION_SPD'][0] != SPDV4_VERSION_MAJOR: msg = "File seems to be wrong version for this driver" raise generic.LiDARFormatNotUnderstood(msg) else: # make sure the attributes have the right names # first collect all the min and max names so we can # set the init vals appropriately minNames = [] maxNames = [] for updateDict in [POINTS_HEADER_UPDATE_DICT, PULSES_HEADER_UPDATE_DICT, WAVEFORMS_HEADER_UPDATE_DICT]: for key in updateDict.keys(): minName, maxName = updateDict[key] minNames.append(minName) maxNames.append(maxName) for key in HEADER_FIELDS: cls = HEADER_FIELDS[key] # blank value - 0 for numbers, '' for strings # do check for min and max fields if key in HEADER_ARRAY_FIELDS: fileAttrs[key] = numpy.array([cls()]) else: if cls == bytes: # defaults to '' fileAttrs[key] = cls() else: if numpy.issubdtype(cls, numpy.floating): info = numpy.finfo(cls) else: info = numpy.iinfo(cls) # set to the opposite so they get properly updated if key in minNames: # have to make it of type cls as it isn't by default fileAttrs[key] = cls(info.max) elif key in maxNames: fileAttrs[key] = cls(info.min) else: # defaults to 0 fileAttrs[key] = cls() # write the GENERATING_SOFTWARE tag fileAttrs['GENERATING_SOFTWARE'] = generic.SOFTWARE_NAME # create the POINTS and PULSES groups data = self.fileHandle.create_group('DATA') data.create_group('POINTS') data.create_group('PULSES') data.create_group('WAVEFORMS') # Spatial Index self.si_handler = spdv4_index.SPDV4SpatialIndex.getHandlerForFile( self.fileHandle, mode, prefType=self.preferredSpatialIndex) # the following is for caching reads so we don't need to # keep re-reading each time the user asks. Also handy since # reading points requires pulses etc self.lastExtent = None self.lastPulseRange = None self.lastPoints = None self.lastPointsSpace = None self.lastPoints_Idx = None self.lastPoints_IdxMask = None self.lastPointsColumns = None self.lastPulses = None self.lastPulsesSpace = None self.lastPulses_Idx = None self.lastPulses_IdxMask = None self.lastPulsesColumns = None # h5space.H5Space self.lastWaveSpace = None self.lastWave_Idx = None self.lastWave_IdxMask = None # h5space.H5Space self.lastTransSpace = None # index to turn into 2d transbypulses self.lastTrans_Idx = None # mask for 2d transbypulses self.lastTrans_IdxMask = None # h5space.H5Space self.lastRecvSpace = None # index to turn into 2d recvbypulses self.lastRecv_Idx = None # mask for 2d recvbypulses self.lastRecv_IdxMask = None # flushed by close() self.pulseScalingValues = {} self.pointScalingValues = {} self.waveFormScalingValues = {} self.pulseNullValues = {} self.pointNullValues = {} self.waveFormNullValues = {} # handle dtypes for optional fields self.pulseDtypes = {} self.pointDtypes = {} self.waveFormDtypes = {} # the current extent or range for data being read self.extent = None self.pulseRange = None self.pixGrid = None self.extentAlignedWithSpatialIndex = True self.unalignedWarningGiven = False # for writing a new file, we generate PULSE_ID uniquely self.lastPulseID = numpy.uint64(0) # set up list for conversion of CLASSIFICATION column self.classificationTranslation.append((SPDV4_CLASSIFICATION_CREATED, generic.CLASSIFICATION_CREATED)) self.classificationTranslation.append((SPDV4_CLASSIFICATION_GROUND, generic.CLASSIFICATION_GROUND)) self.classificationTranslation.append((SPDV4_CLASSIFICATION_LOWVEGE, generic.CLASSIFICATION_LOWVEGE)) self.classificationTranslation.append((SPDV4_CLASSIFICATION_MEDVEGE, generic.CLASSIFICATION_MEDVEGE)) self.classificationTranslation.append((SPDV4_CLASSIFICATION_HIGHVEGE, generic.CLASSIFICATION_HIGHVEGE)) self.classificationTranslation.append((SPDV4_CLASSIFICATION_BUILDING, generic.CLASSIFICATION_BUILDING)) self.classificationTranslation.append((SPDV4_CLASSIFICATION_WATER, generic.CLASSIFICATION_WATER)) self.classificationTranslation.append((SPDV4_CLASSIFICATION_TRUNK, generic.CLASSIFICATION_TRUNK)) self.classificationTranslation.append((SPDV4_CLASSIFICATION_FOLIAGE, generic.CLASSIFICATION_FOLIAGE)) self.classificationTranslation.append((SPDV4_CLASSIFICATION_BRANCH, generic.CLASSIFICATION_BRANCH)) self.classificationTranslation.append((SPDV4_CLASSIFICATION_RAIL, generic.CLASSIFICATION_RAIL))
[docs] @staticmethod def getDriverName(): """ Name of this driver """ return "SPDV4"
[docs] @staticmethod def getTranslationDict(arrayType): """ Translation dictionary between formats """ dict = {} if arrayType == generic.ARRAY_TYPE_POINTS: dict[generic.FIELD_POINTS_RETURN_NUMBER] = 'RETURN_NUMBER' elif arrayType == generic.ARRAY_TYPE_PULSES: dict[generic.FIELD_PULSES_TIMESTAMP] = 'TIMESTAMP' return dict
[docs] @staticmethod def getHeaderTranslationDict(): """ Return dictionary with non-standard header names """ return HEADER_TRANSLATION_DICT
[docs] def setExtent(self, extent): """ Set the extent for reading or writing """ if not self.hasSpatialIndex(): msg = 'Format has no spatial Index. Processing must be done non-spatially' raise generic.LiDARInvalidSetting(msg) self.extent = extent # need to check that the given extent is on the same grid as the # spatial index. If not a new spatial index will have to be calculated # for each block before we can access the data. totalPixGrid = self.getPixelGrid() extentPixGrid = pixelgrid.PixelGridDefn(xMin=extent.xMin, xMax=extent.xMax, yMin=extent.yMin, yMax=extent.yMax, xRes=extent.binSize, yRes=extent.binSize, projection=totalPixGrid.projection) self.extentAlignedWithSpatialIndex = (self.si_handler.canAccessUnaligned() or (extentPixGrid.alignedWith(totalPixGrid) and extent.binSize == totalPixGrid.xRes)) if (not self.extentAlignedWithSpatialIndex and not self.unalignedWarningGiven): msg = """Extent not on same grid or resolution as file. spatial index will be recomputed on the fly""" self.controls.messageHandler(msg, generic.MESSAGE_INFORMATION) self.unalignedWarningGiven = True
[docs] def getPixelGrid(self): """ Return the PixelGridDefn for this file """ if self.hasSpatialIndex(): pixGrid = copy.copy(self.si_handler.pixelGrid) else: # no spatial index - no pixgrid pixGrid = None return pixGrid
[docs] def setPixelGrid(self, pixGrid): """ Set the PixelGridDefn for the reading or writing """ if self.mode == generic.READ or self.mode == generic.UPDATE: msg = 'Can only set new pixel grid when creating' raise generic.LiDARInvalidData(msg) else: self.si_handler.createNewIndex(pixGrid) # cache it self.pixGrid = pixGrid
[docs] def hasSpatialIndex(self): """ Return True if we have a spatial index. """ return (self.si_handler is not None and self.si_handler.pixelGrid is not None)
[docs] def close(self): """ Close all open file handles """ if self.si_handler is not None: self.si_handler.close() # flush the scaling values if self.mode != generic.READ: pulsesHandle = self.fileHandle['DATA']['PULSES'] for colName in self.pulseScalingValues.keys(): gain, offset = self.pulseScalingValues[colName] if colName not in pulsesHandle: if self.scalingButNoDataWarning: msg = 'scaling set for column %s but no data written' msg = msg % colName self.controls.messageHandler(msg, generic.MESSAGE_INFORMATION) else: attrs = pulsesHandle[colName].attrs attrs[GAIN_NAME] = gain attrs[OFFSET_NAME] = offset pointsHandle = self.fileHandle['DATA']['POINTS'] for colName in self.pointScalingValues.keys(): gain, offset = self.pointScalingValues[colName] if colName not in pointsHandle: if self.scalingButNoDataWarning: msg = 'scaling set for column %s but no data written' msg = msg % colName self.controls.messageHandler(msg, generic.MESSAGE_INFORMATION) else: attrs = pointsHandle[colName].attrs attrs[GAIN_NAME] = gain attrs[OFFSET_NAME] = offset waveHandle = self.fileHandle['DATA']['WAVEFORMS'] for colName in self.waveFormScalingValues.keys(): gain, offset = self.waveFormScalingValues[colName] if colName not in waveHandle: if self.scalingButNoDataWarning: msg = 'scaling set for column %s but no data written' msg = msg % colName self.controls.messageHandler(msg, generic.MESSAGE_INFORMATION) else: attrs = waveHandle[colName].attrs attrs[GAIN_NAME] = gain attrs[OFFSET_NAME] = offset # and the null values for colName in self.pulseNullValues.keys(): value, scaled = self.pulseNullValues[colName] if scaled: gain, offset = self.getScaling(colName, generic.ARRAY_TYPE_PULSES) value = (value - offset) * gain handle = self.fileHandle['DATA']['PULSES'] if colName not in handle: if self.scalingButNoDataWarning: msg = 'null set for column %s but no data written' msg = msg % colName self.controls.messageHandler(msg, generic.MESSAGE_INFORMATION) else: attrs = handle[colName].attrs attrs[NULL_NAME] = value for colName in self.pointNullValues.keys(): value, scaled = self.pointNullValues[colName] if scaled: gain, offset = self.getScaling(colName, generic.ARRAY_TYPE_POINTS) value = (value - offset) * gain handle = self.fileHandle['DATA']['POINTS'] if colName not in handle: if self.scalingButNoDataWarning: msg = 'null set for column %s but no data written' msg = msg % colName self.controls.messageHandler(msg, generic.MESSAGE_INFORMATION) else: attrs = handle[colName].attrs attrs[NULL_NAME] = value for colName in self.waveFormNullValues.keys(): value, scaled = self.waveFormNullValues[colName] if scaled: gain, offset = self.getScaling(colName, generic.ARRAY_TYPE_WAVEFORMS) value = (value - offset) * gain handle = self.fileHandle['DATA']['WAVEFORMS'] if colName not in handle: if self.scalingButNoDataWarning: msg = 'null set for column %s but no data written' msg = msg % colName self.controls.messageHandler(msg, generic.MESSAGE_INFORMATION) else: attrs = handle[colName].attrs attrs[NULL_NAME] = value # write the version information headerArray = numpy.array([SPDV4_VERSION_MAJOR, SPDV4_VERSION_MINOR], HEADER_FIELDS['VERSION_SPD']) self.setHeaderValue('VERSION_SPD', headerArray) # close self.fileHandle.close() self.fileHandle = None self.lastExtent = None self.lastPoints = None self.lastPointsSpace = None self.lastPoints_Idx = None self.lastPoints_IdxMask = None self.lastPointsColumns = None self.lastPulses = None self.lastPulseRange = None self.lastPulsesSpace = None self.lastPulses_Idx = None self.lastPulses_IdxMask = None self.lastPulsesColumns = None self.lastWaveSpace = None self.lastWave_Idx = None self.lastWave_IdxMask = None self.lastTransSpace = None self.lastTrans_Idx = None self.lastTrans_IdxMask = None self.lastRecvSpace = None self.lastRecv_Idx = None self.lastRecv_IdxMask = None self.pulseScalingValues = None self.pointScalingValues = None self.extent = None self.pulseRange = None self.pixGrid = None self.pulseDtypes = None self.pointDtypes = None self.waveFormDtypes = None
[docs] @staticmethod def readFieldAndUnScale(handle, name, selection, unScaled=False): """ Given a h5py handle, field name and selection does any unscaling if asked (unScaled=False). """ attrs = handle[name].attrs data = selection.read(handle[name]) if not unScaled and GAIN_NAME in attrs and OFFSET_NAME in attrs: data = (data / attrs[GAIN_NAME]) + attrs[OFFSET_NAME] return data
[docs] @staticmethod def readFieldsAndUnScale(handle, colNames, selection): """ Given a list of column names returns a structured array of the data. If colNames is a string, a single un-structred array will be returned. It will work out of any of the column names end with '_U' and deal with them appropriately. selection should be a h5space.H5Space. """ if isinstance(colNames, str): if colNames not in handle: msg = 'column %s not found in file' % colNames raise generic.LiDARArrayColumnError(msg) unScaled = colNames.endswith('_U') if unScaled: colNames = colNames[:-2] data = SPDV4File.readFieldAndUnScale(handle, colNames, selection, unScaled) else: # create a blank structured array to read the data into dtypeList = [] # names in the HDF5 file ('_U' removed) hdfNameList = [] # Whether they are unscaled fields or not unScaledList = [] for name in colNames: if name not in handle: msg = 'column %s not found in file' % name raise generic.LiDARArrayColumnError(msg) unScaled = name.endswith('_U') hdfName = name attrs = handle[name].attrs hasScale = GAIN_NAME in attrs and OFFSET_NAME in attrs if hasScale and not unScaled: s = 'f8' else: if unScaled: hdfName = name[:-2] s = handle[hdfName].dtype.str dtypeList.append((str(name), s)) hdfNameList.append(hdfName) unScaledList.append(unScaled) numRecords = selection.getSelectionSize() data = numpy.empty(numRecords, dtypeList) for name, hdfName, unScaled in zip(colNames, hdfNameList, unScaledList): field = SPDV4File.readFieldAndUnScale(handle, hdfName, selection, unScaled) data[str(name)] = field return data
[docs] def readPointsForExtent(self, colNames=None): """ Read all the points within the given extent as 1d structured array. colNames can be a name or list of column names to return. By default all columns are returned. """ pointsHandle = self.fileHandle['DATA']['POINTS'] if colNames is None: # get all names colNames = pointsHandle.keys() # returned cached if possible if (self.lastExtent is not None and self.lastExtent == self.extent and self.lastPoints is not None and self.lastPointsColumns is not None): if self.lastPointsColumns == colNames: return self.lastPoints if (isinstance(colNames, str) and self.lastPoints.dtype.names is not None and colNames in self.lastPoints.dtype.names): return self.lastPoints[colNames] point_space, idx, mask_idx = ( self.si_handler.getPointsSpaceForExtent(self.extent, self.controls.overlap, self.extentAlignedWithSpatialIndex)) points = self.readFieldsAndUnScale(pointsHandle, colNames, point_space) # translate any classifications self.recodeClassification(points, generic.RECODE_TO_LAS, colNames) self.lastExtent = copy.copy(self.extent) self.lastPoints = points self.lastPointsSpace = point_space self.lastPoints_Idx = idx self.lastPoints_IdxMask = mask_idx self.lastPointsColumns = colNames return points
[docs] def readPulsesForExtent(self, colNames=None): """ Read all the pulses within the given extent as 1d structured array. colNames can be a name or list of column names to return. By default all columns are returned. """ pulsesHandle = self.fileHandle['DATA']['PULSES'] if colNames is None: # get all names colNames = pulsesHandle.keys() # returned cached if possible if (self.lastExtent is not None and self.lastExtent == self.extent and self.lastPulses is not None and self.lastPulsesColumns is not None): if self.lastPulsesColumns == colNames: return self.lastPulses if (isinstance(colNames, str) and self.lastPulses.dtype.names is not None and colNames in self.lastPulses.dtype.names): return self.lastPulses[colNames] pulse_space, idx, mask_idx = ( self.si_handler.getPulsesSpaceForExtent(self.extent, self.controls.overlap, self.extentAlignedWithSpatialIndex)) pulses = self.readFieldsAndUnScale(pulsesHandle, colNames, pulse_space) if not self.extentAlignedWithSpatialIndex: # need to recompute subset of spatial index to bins # are aligned with current extent # round() ok since points should already be on the grid, nasty # rounding errors propogated with ceil() nrows = int(numpy.round((self.extent.yMax - self.extent.yMin) / self.extent.binSize)) ncols = int(numpy.round((self.extent.xMax - self.extent.xMin) / self.extent.binSize)) nrows += (self.controls.overlap * 2) ncols += (self.controls.overlap * 2) x_idx = self.readFieldAndUnScale(pulsesHandle, self.si_handler.si_xPulseColName, pulse_space) y_idx = self.readFieldAndUnScale(pulsesHandle, self.si_handler.si_yPulseColName, pulse_space) mask, sortedbins, new_idx, new_cnt = gridindexutils.CreateSpatialIndex( y_idx, x_idx, self.extent.binSize, self.extent.yMax, self.extent.xMin, nrows, ncols, SPDV4_SIMPLEGRID_INDEX_DTYPE, SPDV4_SIMPLEGRID_COUNT_DTYPE) # ok calculate indices on new spatial indexes nOut = self.fileHandle['DATA']['PULSES']['PULSE_ID'].shape[0] pulse_space, idx, mask_idx = gridindexutils.convertSPDIdxToReadIdxAndMaskInfo( new_idx, new_cnt, nOut) # re-sort the pulses to match the new spatial index pulses = pulses[mask] pulses = pulses[sortedbins] self.lastExtent = copy.copy(self.extent) self.lastPulses = pulses self.lastPulsesSpace = pulse_space self.lastPulses_Idx = idx self.lastPulses_IdxMask = mask_idx self.lastPulsesColumns = colNames self.lastPoints = None # cache will now be out of date return pulses
[docs] def readPulsesForExtentByBins(self, extent=None, colNames=None): """ Return the pulses as a 3d structured masked array. """ # if they have given us a new extent then use that if extent is not None: oldExtent = self.lastExtent self.setExtent(extent) # go and get the pulses - should returned cached if # already got. pulses = self.readPulsesForExtent(colNames) # get these 'last' indices which map spatial index to pulses idx = self.lastPulses_Idx idxMask = self.lastPulses_IdxMask # re-map into 3d pulsesByBins = pulses[idx] # set extent back to the 'normal' one for this block # in case they call this again without the extent param if extent is not None: self.setExtent(oldExtent) # make masked array pulses = numpy.ma.array(pulsesByBins, mask=idxMask) return pulses
[docs] def readPointsForExtentByBins(self, extent=None, colNames=None, indexByPulse=False, returnPulseIndex=False): """ Return the points as a 3d structured masked array. Note that because the spatial index on a SPDV4 file currently is on pulses this may miss points that are attached to pulses outside the current extent. If this is a problem then select an overlap large enough. Pass indexByPulse=True to bin the points by the locations of the pulses (using X_IDX and Y_IDX rather than the locations of the points) This is the default for non-cartesian indices. Pass returnPulseIndex=True to also return a masked 3d array of the indices into the 1d pulse array (as returned by readPulsesForExtent()) """ # if they have given us a new extent then use that if extent is not None: oldExtent = self.lastExtent self.setExtent(extent) # have to spatially index the points # since SPDV4 files have only a spatial index on pulses currently. points = self.readPointsForExtent(colNames) # round() ok since points should already be on the grid, nasty # rounding errors propogated with ceil() nrows = int(numpy.round((self.lastExtent.yMax - self.lastExtent.yMin) / self.lastExtent.binSize)) ncols = int(numpy.round((self.lastExtent.xMax - self.lastExtent.xMin) / self.lastExtent.binSize)) # add overlap nrows += (self.controls.overlap * 2) ncols += (self.controls.overlap * 2) xMin = self.lastExtent.xMin - (self.controls.overlap * self.lastExtent.binSize) yMax = self.lastExtent.yMax + (self.controls.overlap * self.lastExtent.binSize) # create point spatial index if indexByPulse or self.si_handler.indexType != SPDV4_INDEX_CARTESIAN: # TODO: check if is there is a better way of going about this # in theory spatial index already exists but may be more work # it is worth to use x_idx = self.readPulsesForExtent(self.si_handler.si_xPulseColName) y_idx = self.readPulsesForExtent(self.si_handler.si_yPulseColName) nreturns = self.readPulsesForExtent('NUMBER_OF_RETURNS') x_idx = numpy.repeat(x_idx, nreturns) y_idx = numpy.repeat(y_idx, nreturns) else: x_idx = self.readPointsForExtent('X') y_idx = self.readPointsForExtent('Y') mask, sortedbins, idx, cnt = gridindexutils.CreateSpatialIndex( y_idx, x_idx, self.lastExtent.binSize, yMax, xMin, nrows, ncols, SPDV4_SIMPLEGRID_INDEX_DTYPE, SPDV4_SIMPLEGRID_COUNT_DTYPE) nOut = len(points) pts_space, pts_idx, pts_idx_mask = gridindexutils.convertSPDIdxToReadIdxAndMaskInfo( idx, cnt, nOut) points = points[mask] sortedPoints = points[sortedbins] pointsByBins = sortedPoints[pts_idx] # set extent back to the 'normal' one for this block # in case they call this again without the extent param if extent is not None: self.setExtent(oldExtent) self.lastPoints3d_Idx = pts_idx self.lastPoints3d_IdxMask = pts_idx_mask self.lastPoints3d_InRegionMask = mask self.lastPoints3d_InRegionSort = sortedbins points = numpy.ma.array(pointsByBins, mask=pts_idx_mask) if indexByPulse and returnPulseIndex: # have to generate array the same lengths as the 1d points # but containing the indexes of the pulses pulse_count = numpy.arange(0, nreturns.size) # convert this into an array with an element for each point pulse_idx_1d = numpy.repeat(pulse_count, nreturns) # mask the ones that are within the spatial index pulse_idx_1d = pulse_idx_1d[mask] # sort the right way sortedpulse_idx_1d = pulse_idx_1d[sortedbins] # turn into a 3d in the same way as the points themselves pulse_idx_3d = sortedpulse_idx_1d[pts_idx] # create a masked array pulse_idx_3dmask = numpy.ma.array(pulse_idx_3d, mask=pts_idx_mask) # return 2 things return points, pulse_idx_3dmask else: # just return the points return points
[docs] def readPointsByPulse(self, colNames=None): """ Return a 2d masked structured array of point that matches the pulses. """ if self.controls.spatialProcessing: points = self.readPointsForExtent(colNames) else: points = self.readPointsForRange(colNames) idx = self.lastPoints_Idx idxMask = self.lastPoints_IdxMask if points is None: return None pointsByPulse = points[idx] points = numpy.ma.array(pointsByPulse, mask=idxMask) return points
[docs] def readWaveformInfo(self): """ Return 2d masked array of information about the waveforms. """ if self.controls.spatialProcessing: try: # optional fields - fail if they don't exist idx = self.readPulsesForExtent('WFM_START_IDX') cnt = self.readPulsesForExtent('NUMBER_OF_WAVEFORM_SAMPLES') except generic.LiDARArrayColumnError: return None else: try: # optional fields - fail if they don't exist idx = self.readPulsesForRange('WFM_START_IDX') cnt = self.readPulsesForRange('NUMBER_OF_WAVEFORM_SAMPLES') except generic.LiDARArrayColumnError: return None waveHandle = self.fileHandle['DATA']['WAVEFORMS'] colNames = waveHandle.keys() if len(colNames) == 0: return None nOut = waveHandle['NUMBER_OF_WAVEFORM_RECEIVED_BINS'].shape[0] wave_space, wave_idx, wave_idx_mask = gridindexutils.convertSPDIdxToReadIdxAndMaskInfo( idx, cnt, nOut) waveformInfo = self.readFieldsAndUnScale(waveHandle, colNames, wave_space) waveformInfo = waveformInfo[wave_idx] wave_masked = numpy.ma.array(waveformInfo, mask=wave_idx_mask) self.lastWaveSpace = wave_space if self.mode == generic.UPDATE: self.lastWave_Idx = wave_idx self.lastWave_IdxMask = wave_idx_mask return wave_masked
[docs] def readTransmitted(self): """ Return the 3d masked integer array of transmitted for each of the current pulses. First axis is the waveform bin. Second axis is waveform number and last is pulse. """ waveformInfo = self.readWaveformInfo() if waveformInfo is None: return None if 'TRANSMITTED' not in self.fileHandle['DATA']: return None nOut = self.fileHandle['DATA']['TRANSMITTED'].shape[0] # NB: waveformInfo is masked idx = waveformInfo['TRANSMITTED_START_IDX'].data cnt = waveformInfo['NUMBER_OF_WAVEFORM_TRANSMITTED_BINS'] cnt = cnt.filled(0) trans_shape, trans_idx, trans_idx_mask = gridindexutils.convertSPDIdxToReadIdxAndMaskInfo( idx, cnt, nOut) trans = trans_shape.read(self.fileHandle['DATA']['TRANSMITTED']) # so we can apply scaling, below trans = trans.astype(numpy.float32) trans = trans[trans_idx] # apply gain and offset for waveform in range(waveformInfo.shape[0]): offset = waveformInfo[waveform]['TRANS_WAVE_OFFSET'] gain = waveformInfo[waveform]['TRANS_WAVE_GAIN'] trans[:,waveform] = (trans[:,waveform] / gain) + offset self.lastTransSpace = trans_shape if self.mode == generic.UPDATE: self.lastTrans_Idx = trans_idx self.lastTrans_IdxMask = trans_idx_mask # create masked array trans = numpy.ma.array(trans, mask=trans_idx_mask) return trans
[docs] def readReceived(self): """ Return the 3d masked integer array of received for each of the current pulses. First axis is the waveform bin. Second axis is waveform number and last is pulse. """ waveformInfo = self.readWaveformInfo() if waveformInfo is None: return None if 'RECEIVED' not in self.fileHandle['DATA']: return None nOut = self.fileHandle['DATA']['RECEIVED'].shape[0] # NB: waveformInfo is masked idx = waveformInfo['RECEIVED_START_IDX'].data cnt = waveformInfo['NUMBER_OF_WAVEFORM_RECEIVED_BINS'] cnt = cnt.filled(0) recv_shape, recv_idx, recv_idx_mask = gridindexutils.convertSPDIdxToReadIdxAndMaskInfo( idx, cnt, nOut) recv = recv_shape.read(self.fileHandle['DATA']['RECEIVED']) # so we can apply scaling, below recv = recv.astype(numpy.float32) recv = recv[recv_idx] # apply gain and offset for waveform in range(waveformInfo.shape[0]): offset = waveformInfo[waveform]['RECEIVE_WAVE_OFFSET'] gain = waveformInfo[waveform]['RECEIVE_WAVE_GAIN'] recv[:,waveform] = (recv[:,waveform] / gain) + offset self.lastRecvSpace = recv_shape if self.mode == generic.UPDATE: self.lastRecv_Idx = recv_idx self.lastRecv_IdxMask = recv_idx_mask # create masked array recv = numpy.ma.array(recv, mask=recv_idx_mask) return recv
[docs] def preparePulsesForWriting(self, pulses): """ Called from writeData(). Massages what the user has passed into something we can write back to the file. """ if pulses.size == 0: return None, None, None mask = None binidx = None if pulses.ndim == 3: # must flatten back to be 1d using the indexes # used to create the 3d version (pulsesbybin) if self.mode == generic.UPDATE: flatSize = self.lastPulses_Idx.max() + 1 flatPulses = numpy.empty((flatSize,), dtype=pulses.data.dtype) gridindexutils.flatten3dMaskedArray(flatPulses, pulses, self.lastPulses_IdxMask, self.lastPulses_Idx) pulses = flatPulses else: # TODO: flatten somehow raise NotImplementedError() if pulses.ndim != 1: msg = 'Pulse array must be either 1d or 3d' raise generic.LiDARInvalidSetting(msg) # NOTE: on update now we just don't write the X_IDX # and Y_IDX fields since they shouldn't be updated. # SPDV3 gives you a warning if you change these fields # in SPDV4 the updates are silently lost # TODO: surely we can do better than this? # essential fields exist? if self.mode == generic.CREATE: for essential in PULSES_ESSENTIAL_FIELDS: if essential not in pulses.dtype.names: msg = ('Essential field %s must exist in pulse data ' + 'when writing new file') % essential raise generic.LiDARInvalidData(msg) if self.extent is not None and self.controls.spatialProcessing: if self.mode == generic.UPDATE: # while we are at it, grab the X_IDX and Y_IDX fields since # they are essential x_idx = self.readPulsesForExtent(self.si_handler.si_xPulseColName) y_idx = self.readPulsesForExtent(self.si_handler.si_yPulseColName) # if we doing spatial processing we need to strip out areas in the overlap # self.extent is the size of the block without the overlap # so just strip out everything outside of it mask = ((x_idx >= self.extent.xMin) & (x_idx < self.extent.xMax) & (y_idx > self.extent.yMin) & (y_idx <= self.extent.yMax)) pulses = pulses[mask] self.lastPulsesSpace.updateBoolArray(mask) else: # create # get the spatial index handling code to sort it. pulses, mask, binidx = self.si_handler.setPulsesForExtent( self.extent, pulses, self.lastPulseID, self.extentAlignedWithSpatialIndex) elif (self.mode == generic.UPDATE and self.createIndexOnUpdate and self.si_handler.canUpdateInPlace()): # allow the spatial index to be created in place self.si_handler.setPulsesForExtent(self.extent, pulses, self.lastPulsesSpace, self.extentAlignedWithSpatialIndex) return pulses, mask, binidx
[docs] def preparePointsForWriting(self, points, mask): """ Called from writeData(). Massages what the user has passed into something we can write back to the file. """ pts_start = None nreturns = None returnNumber = None if points.size == 0: return None, pts_start, nreturns, returnNumber origPointsDims = points.ndim if points.ndim == 3: # must flatten back to be 1d using the indexes # used to create the 3d version (pointsbybin) if self.mode == generic.UPDATE: flatSize = self.lastPoints3d_Idx.max() + 1 flatPoints = numpy.empty((flatSize,), dtype=points.data.dtype) gridindexutils.flatten3dMaskedArray(flatPoints, points, self.lastPoints3d_IdxMask, self.lastPoints3d_Idx) points = flatPoints else: # TODO: flatten somehow raise NotImplementedError() if points.ndim == 2: # must flatten back to be 1d using the indexes # used to create the 2d version (pointsbypulses) if self.mode == generic.UPDATE: flatSize = self.lastPoints_Idx.max() + 1 flatPoints = numpy.empty((flatSize,), dtype=points.data.dtype) gridindexutils.flatten2dMaskedArray(flatPoints, points, self.lastPoints_IdxMask, self.lastPoints_Idx) points = flatPoints else: # get the number of returns for each pulse # this doesn't work with structured arrays so need # to use one of the fields firstField = points.dtype.names[0] nreturns = points[firstField].count(axis=0) pointsHandle = self.fileHandle['DATA']['POINTS'] currPointsCount = 0 if firstField in pointsHandle: currPointsCount = pointsHandle[firstField].shape[0] # cumsum gives us the end of the points # so need roll to move to the start # then add the current size of the data written pts_start = numpy.cumsum(nreturns) pts_start = numpy.roll(pts_start, 1) if pts_start.size > 0: pts_start[0] = 0 pts_start += currPointsCount # unfortunately points.compressed() doesn't work # for structured arrays. Use our own version instead ptsCount = points[firstField].count() outPoints = numpy.empty(ptsCount, dtype=points.data.dtype) returnNumber = numpy.empty(ptsCount, dtype=POINT_FIELDS['RETURN_NUMBER']) gridindexutils.flattenMaskedStructuredArray(points.data, points[firstField].mask, outPoints, returnNumber) points = outPoints if points.ndim != 1: msg = 'Point array must be either 1d, 2 or 3d' raise generic.LiDARInvalidData(msg) if self.mode == generic.UPDATE: if self.controls.spatialProcessing: nreturns = self.readPulsesForExtent('NUMBER_OF_RETURNS') mask = numpy.repeat(mask, nreturns) if origPointsDims == 3: # was pointsbybins, now 1d. sortedPointsundo = numpy.empty_like(points) gridindexutils.unsortArray(points, self.lastPoints3d_InRegionSort, sortedPointsundo) points = sortedPointsundo mask = mask[self.lastPoints3d_InRegionMask] # we didn't do this in writeData, but we can do it now points = points[mask] self.lastPointsSpace.updateBoolArray(self.lastPoints3d_InRegionMask) else: self.lastPointsSpace.updateBoolArray(mask) else: # need to check that passed in data has all the required fields for essential in POINTS_ESSENTIAL_FIELDS: if essential not in points.dtype.names: msg = ('Essential field %s must exist in point data ' + 'when writing new file') % essential raise generic.LiDARInvalidData(msg) # transparently convert the IGNORE column (if it exists) # into the POINT_FLAGS if 'IGNORE' in points.dtype.names: ignoreField = points['IGNORE'] # create a new array without IGNORE newDType = [] for name in points.dtype.names: if name != 'IGNORE': s = points.dtype[name].str newDType.append((name, s)) # ensure it has POINT_FLAGS newFlags = numpy.where(points['IGNORE'] != 0, SPDV4_POINT_FLAGS_IGNORE, 0) if 'POINT_FLAGS' not in points.dtype.names: s = numpy.dtype(POINT_FIELDS['POINT_FLAGS']).str newDType.append(('POINT_FLAGS', s)) else: # combine newFlags = newFlags | points['POINT_FLAGS'] # create it newpoints = numpy.empty(points.shape, dtype=newDType) # copy all the old fields accross for name in points.dtype.names: if name != 'IGNORE' and name != 'POINT_FLAGS': newpoints[name] = points[name] # make sure it has the updated 'POINT_FLAGS' newpoints['POINT_FLAGS'] = newFlags points = newpoints # translate any classifications self.recodeClassification(points, generic.RECODE_TO_DRIVER) return points, pts_start, nreturns, returnNumber
[docs] def prepareTransmittedForWriting(self, transmitted, waveformInfo): """ Called from writeData(). Massages what the user has passed into something we can write back to the file. """ if transmitted.size == 0: return None, None, None if transmitted.ndim != 3: msg = 'transmitted data must be 3d' raise generic.LiDARInvalidData(msg) trans_start = None ntrans = None # un scale back to DN dntransmitted = numpy.empty(transmitted.shape, TRANSMITTED_DTYPE) for waveform in range(waveformInfo.shape[0]): offset = waveformInfo[waveform]['TRANS_WAVE_OFFSET'] gain = waveformInfo[waveform]['TRANS_WAVE_GAIN'] dntransmitted[:,waveform] = (transmitted[:,waveform] - offset) * gain transmitted = numpy.ma.MaskedArray(dntransmitted, mask=transmitted.mask) if self.mode == generic.UPDATE: origShape = transmitted.shape # flatten it back to 1d so it can be written flatSize = self.lastTrans_Idx.max() + 1 flatTrans = numpy.empty((flatSize,), dtype=transmitted.data.dtype) gridindexutils.flatten3dMaskedArray(flatTrans, transmitted, self.lastTrans_IdxMask, self.lastTrans_Idx) transmitted = flatTrans else: # create arrays for flatten3dWaveformData firstField = waveformInfo.dtype.names[0] ntrans = numpy.zeros(waveformInfo[firstField].count(), dtype=numpy.uint16) flattened = numpy.empty(transmitted.count(), dtype=transmitted.dtype) flatten3dWaveformData(transmitted.data, transmitted.mask, ntrans, flattened) currTransCount = 0 if 'TRANSMITTED' in self.fileHandle['DATA']: transHandle = self.fileHandle['DATA']['TRANSMITTED'] currTransCount = transHandle.shape[0] trans_start = numpy.cumsum(ntrans) trans_start = numpy.roll(trans_start, 1) if trans_start.size > 0: trans_start[0] = 0 trans_start += currTransCount transmitted = flattened return transmitted, trans_start, ntrans
[docs] def prepareReceivedForWriting(self, received, waveformInfo): """ Called from writeData(). Massages what the user has passed into something we can write back to the file. """ if received.size == 0: return None, None, None if received.ndim != 3: msg = 'received data must be 3d' raise generic.LiDARInvalidData(msg) recv_start = None nrecv = None # un scale back to DN dnreceived = numpy.empty(received.shape, RECEIVED_DTYPE) for waveform in range(waveformInfo.shape[0]): offset = waveformInfo[waveform]['RECEIVE_WAVE_OFFSET'] gain = waveformInfo[waveform]['RECEIVE_WAVE_GAIN'] dnreceived[:,waveform] = (received[:,waveform] - offset) * gain received = numpy.ma.MaskedArray(dnreceived, mask=received.mask) if self.mode == generic.UPDATE: origShape = received.shape # flatten it back to 1d so it can be written flatSize = self.lastRecv_Idx.max() + 1 flatRecv = numpy.empty((flatSize,), dtype=received.data.dtype) gridindexutils.flatten3dMaskedArray(flatRecv, received, self.lastRecv_IdxMask, self.lastRecv_Idx) received = flatRecv else: # create arrays for flatten3dWaveformData firstField = waveformInfo.dtype.names[0] nrecv = numpy.zeros(waveformInfo[firstField].count(), dtype=numpy.uint16) flattened = numpy.empty(received.count(), dtype=received.dtype) flatten3dWaveformData(received.data, received.mask, nrecv, flattened) currRecvCount = 0 if 'RECEIVED' in self.fileHandle['DATA']: recvHandle = self.fileHandle['DATA']['RECEIVED'] currRecvCount = recvHandle.shape[0] recv_start = numpy.cumsum(nrecv) recv_start = numpy.roll(recv_start, 1) if recv_start.size > 0: recv_start[0] = 0 recv_start += currRecvCount received = flattened return received, recv_start, nrecv
[docs] def prepareWaveformInfoForWriting(self, waveformInfo): """ Flattens the waveformInfo back out so it can be written """ firstField = waveformInfo.dtype.names[0] nwaveforms = waveformInfo[firstField].count(axis=0) waveHandle = self.fileHandle['DATA']['WAVEFORMS'] currWaveformsCount = 0 if firstField in waveHandle: currWaveformsCount = waveHandle[firstField].shape[0] # cumsum gives us the end of the points # so need roll to move to the start # then add the current size of the data written wfm_start = numpy.cumsum(nwaveforms) wfm_start = numpy.roll(wfm_start, 1) if wfm_start.size > 0: wfm_start[0] = 0 wfm_start += currWaveformsCount if self.mode == generic.UPDATE: # flatten it back to 1d so it can be written flatSize = self.lastWave_Idx.max() + 1 outWave = numpy.empty((flatSize,), dtype=waveformInfo.data.dtype) gridindexutils.flatten3dMaskedArray(outWave, waveformInfo, self.lastWave_IdxMask, self.lastWave_Idx) else: # unfortunately points.compressed() doesn't work # for structured arrays. Use our own version instead waveCount = waveformInfo[firstField].count() outWave = numpy.empty(waveCount, dtype=waveformInfo.data.dtype) # we don't actually need this, but need to provide it returnNumber = numpy.empty(waveCount, dtype=numpy.uint32) gridindexutils.flattenMaskedStructuredArray(waveformInfo.data, waveformInfo[firstField].mask, outWave, returnNumber) return outWave, wfm_start, nwaveforms
[docs] def createDataColumn(self, groupHandle, name, data): """ Creates a new data column under groupHandle with the given name with standard HDF5 params. The type is the same as the numpy array data and data is written to the column sets the chunk size to self.hdf5ChunkSize which can be overridden in the driver options. """ # From SPDLib dset = groupHandle.create_dataset(name, data.shape, chunks=self.hdf5ChunkSize, dtype=data.dtype, shuffle=True, compression="gzip", compression_opts=1, maxshape=(None,)) dset[:] = data
[docs] def prepareDataForWriting(self, data, name, arrayType): """ Prepares data for writing to a field. arrayType is one of the ARRAY_TYPE values from .generic. Does unscaling if possible unless name ends with '_U'. Raises exception if column needs to have scale and offset defined, but aren't. Returns the data to write, plus the 'hdfname' which is the field name the data should be written to. This has the '_U' removed. """ try: gain, offset = self.getScaling(name, arrayType) hasScaling = True except generic.LiDARArrayColumnError: hasScaling = False needsScaling = False dataType = None if arrayType == generic.ARRAY_TYPE_POINTS: if name in POINT_SCALED_FIELDS: needsScaling = True if name in POINT_FIELDS: dataType = POINT_FIELDS[name] elif name in self.pointDtypes: dataType = self.pointDtypes[name] elif arrayType == generic.ARRAY_TYPE_PULSES: if name in PULSE_SCALED_FIELDS: needsScaling = True if name in PULSE_FIELDS: dataType = PULSE_FIELDS[name] elif name in self.pulseDtypes: dataType = self.pulseDtypes[name] elif arrayType == generic.ARRAY_TYPE_WAVEFORMS: if name in WAVEFORM_SCALED_FIELDS: needsScaling = True if name in WAVEFORM_FIELDS: dataType = WAVEFORM_FIELDS[name] elif name in self.waveFormDtypes: dataType = self.waveFormDtypes[name] # other array types we don't worry for now # unless of course, they are passing the unscaled stuff in hdfname = name if name.endswith('_U'): needsScaling = False hasScaling = False hdfname = name[:-2] if needsScaling and not hasScaling: msg = "field %s requires scaling to be set before writing" % name raise generic.LiDARArrayColumnError(msg) if hasScaling: data = (data - offset) * gain # cast to datatype if it has one if dataType is not None and numpy.issubdtype(dataType, numpy.integer): # check range info = numpy.iinfo(dataType) dataMin = data.min() if dataMin < info.min: msg = ('The data for field %s when scaled (%f) is less than ' + 'the minimum for the data type (%d)') % (name, dataMin, info.min) raise generic.LiDARScalingError(msg) dataMax = data.max() if dataMax > info.max: msg = ('The data for field %s when scaled (%f) is greater than ' + 'the maximum for the data type (%d)') % (name, dataMax, info.max) raise generic.LiDARScalingError(msg) data = numpy.around(data).astype(dataType) return data, hdfname
[docs] def writeStructuredArray(self, hdfHandle, structArray, generatedColumns, arrayType): """ Writes a structured array as named datasets under hdfHandle. Also writes columns in dictionary generatedColumns to the same place. Only use for file creation. """ firstField = structArray.dtype.names[0] if firstField in hdfHandle: oldSize = hdfHandle[firstField].shape[0] else: oldSize = 0 newSize = oldSize + len(structArray) for name in structArray.dtype.names: # don't bother writing out the ones we generate ourselves if name not in generatedColumns: data, hdfname = self.prepareDataForWriting( structArray[name], name, arrayType) if hdfname in hdfHandle: hdfHandle[hdfname].resize((newSize,)) hdfHandle[hdfname][oldSize:newSize+1] = data else: self.createDataColumn(hdfHandle, hdfname, data) # now write the generated ones for name in generatedColumns.keys(): data, hdfname = self.prepareDataForWriting( generatedColumns[name], name, arrayType) if hdfname in hdfHandle: hdfHandle[hdfname].resize((newSize,)) hdfHandle[hdfname][oldSize:newSize+1] = data else: self.createDataColumn(hdfHandle, hdfname, data)
[docs] def writeData(self, pulses=None, points=None, transmitted=None, received=None, waveformInfo=None): """ Write all the updated data. Pass None for data that do not need to be updated. It is assumed that each parameter has been read by the reading functions """ if self.mode == generic.READ: # the processor always calls this so if a reading driver just ignore return elif self.mode == generic.CREATE: # we only accept new data in a particular form so we can attach # points to pulses if pulses is None and points is None: msg = 'Must provide points and pulses when writing new data' raise generic.LiDARInvalidData(msg) if pulses.ndim != 1: msg = 'pulses must be 1d as returned from getPulses' raise generic.LiDARInvalidData(msg) if points is not None and points.ndim != 2: msg = 'points must be 2d as returned from getPointsByPulse' raise generic.LiDARInvalidData(msg) if (transmitted is not None or received is not None) and waveformInfo is None: msg = 'If transmitted or received is supplied, so must waveformInfo' raise generic.LiDARInvalidData(msg) if transmitted is not None and transmitted.ndim != 3: msg = 'transmitted must be 3d as returned by readTransmitted' raise generic.LiDARInvalidData(msg) if received is not None and received.ndim != 3: msg = 'received must be 3d as returned by readReceived' raise generic.LiDARInvalidData(msg) if waveformInfo is not None and waveformInfo.ndim != 2: msg = 'waveformInfo must be 2d as returned by readWaveformInfo' raise generic.LiDARInvalidData(msg) # only set when doing pulses mask = None binidx = None if (pulses is None and self.mode == generic.UPDATE and self.extent is not None and self.controls.spatialProcessing): # we need the mask so we can mask out points. # preparePulsesForWriting does this if there are pulses x_idx = self.readPulsesForExtent(self.si_handler.si_xPulseColName) y_idx = self.readPulsesForExtent(self.si_handler.si_yPulseColName) mask = ((x_idx >= self.extent.xMin) & (x_idx < self.extent.xMax) & (y_idx > self.extent.yMin) & (y_idx <= self.extent.yMax)) if pulses is not None: pulses, mask, binidx = self.preparePulsesForWriting(pulses) # now we have the mask we can string the points outside the current extent. if mask is not None: # strip out the ones outside the current extent if points is not None: # slight problem with this approach is that we have to make # points 2d first (as returned by pointsbypulse etc) if points.ndim == 1: # readPoints points = points[self.lastPoints_Idx] points = numpy.ma.array(points, mask=self.lastPoints_IdxMask) # mask out outside block points = points[...,mask] if binidx is not None: points = points[...,binidx] elif points.ndim == 3: # readPointsByBins # handled by preparePointsForWriting # as the points need to be re-ordered first pass else: # ndim == 2 points = points[...,mask] if binidx is not None: points = points[...,binidx] if waveformInfo is not None: waveformInfo = waveformInfo[...,mask] if binidx is not None: waveformInfo = waveformInfo[...,binidx] if received is not None: received = received[:,:,mask] if binidx is not None: received = received[:,:,binidx] if transmitted is not None: transmitted = transmitted[:,:,mask] if binidx is not None: transmitted = transmitted[:,:,binidx] if points is not None: points, pts_start, nreturns, returnNumber = ( self.preparePointsForWriting(points, mask)) trans_start = None ntrans = None if transmitted is not None: transmitted, trans_start, ntrans = ( self.prepareTransmittedForWriting(transmitted, waveformInfo)) recv_start = None nrecv = None if received is not None: received, recv_start, nrecv = ( self.prepareReceivedForWriting(received, waveformInfo)) # deal with situation where there is transmitted but not # received etc. Assume other wise they will be the same length. if points is None and pulses is not None: pts_start = numpy.zeros_like(pulses, dtype=numpy.uint64) nreturns = numpy.zeros_like(pulses, dtype=numpy.uint8) if (recv_start is None and nrecv is None and trans_start is not None and ntrans is not None): recv_start = numpy.zeros_like(trans_start) nrecv = numpy.zeros_like(ntrans) if (trans_start is None and ntrans is None and recv_start is not None and nrecv is not None): trans_start = numpy.zeros_like(recv_start) ntrans = numpy.zeros_like(nrecv) if waveformInfo is not None: waveformInfo, wfm_start, nwaveforms = ( self.prepareWaveformInfoForWriting(waveformInfo)) if self.mode == generic.CREATE: if pulses is not None and len(pulses) > 0: pulsesHandle = self.fileHandle['DATA']['PULSES'] # index into points and pulseid generated fields nPulses = len(pulses) pulseid = numpy.arange(self.lastPulseID, self.lastPulseID + nPulses, dtype=PULSE_FIELDS['PULSE_ID']) self.lastPulseID = self.lastPulseID + nPulses generatedColumns = {'PTS_START_IDX' : pts_start, 'NUMBER_OF_RETURNS' : nreturns, 'PULSE_ID' : pulseid} if waveformInfo is not None and len(waveformInfo) > 0: # write extra columns generatedColumns['WFM_START_IDX'] = wfm_start generatedColumns['NUMBER_OF_WAVEFORM_SAMPLES'] = nwaveforms self.writeStructuredArray(pulsesHandle, pulses, generatedColumns, generic.ARRAY_TYPE_PULSES) if points is not None and len(points) > 0: pointsHandle = self.fileHandle['DATA']['POINTS'] generatedColumns = {'RETURN_NUMBER' : returnNumber} self.writeStructuredArray(pointsHandle, points, generatedColumns, generic.ARRAY_TYPE_POINTS) if waveformInfo is not None and len(waveformInfo) > 0: waveHandle = self.fileHandle['DATA']['WAVEFORMS'] generatedColumns = {'NUMBER_OF_WAVEFORM_RECEIVED_BINS' : nrecv, 'NUMBER_OF_WAVEFORM_TRANSMITTED_BINS' : ntrans, 'RECEIVED_START_IDX' : recv_start, 'TRANSMITTED_START_IDX' : trans_start} self.writeStructuredArray(waveHandle, waveformInfo, generatedColumns, generic.ARRAY_TYPE_WAVEFORMS) if transmitted is not None and len(transmitted) > 0: if 'TRANSMITTED' in self.fileHandle['DATA']: tHandle = self.fileHandle['DATA']['TRANSMITTED'] oldSize = tHandle.shape[0] newSize = oldSize + len(transmitted) tHandle.resize((newSize,)) tHandle[oldSize:newSize+1] = transmitted else: self.createDataColumn(self.fileHandle['DATA'], 'TRANSMITTED', transmitted) if received is not None and len(received) > 0: if 'RECEIVED' in self.fileHandle['DATA']: rHandle = self.fileHandle['DATA']['RECEIVED'] oldSize = rHandle.shape[0] newSize = oldSize + len(received) rHandle.resize((newSize,)) rHandle[oldSize:newSize+1] = received else: self.createDataColumn(self.fileHandle['DATA'], 'RECEIVED', received) else: # TODO: should we be re-writing the generated columns?? # Note: we can't use writeStructuredArray since that is for creation if points is not None: pointsHandle = self.fileHandle['DATA']['POINTS'] for name in points.dtype.names: data, hdfname = self.prepareDataForWriting( points[name], name, generic.ARRAY_TYPE_POINTS) if data.size > 0: if hdfname in pointsHandle: # get: Array must be C-contiguous # without the copy self.lastPointsSpace.write(pointsHandle[hdfname], data.copy()) else: self.createDataColumn(pointsHandle, hdfname, data) if pulses is not None: pulsesHandle = self.fileHandle['DATA']['PULSES'] for name in pulses.dtype.names: if (name != 'X_IDX' and name != 'Y_IDX' and name != 'X_IDX_U' and name != 'Y_IDX_U'): data, hdfname = self.prepareDataForWriting( pulses[name], name, generic.ARRAY_TYPE_PULSES) if data.size > 0: if hdfname in pulsesHandle: # get: Array must be C-contiguous # without the copy self.lastPulsesSpace.write(pulsesHandle[hdfname], data.copy()) else: self.createDataColumn(pulsesHandle, hdfname, data) if waveformInfo is not None: waveHandle = self.fileHandle['DATA']['WAVEFORMS'] for name in waveformInfo.dtype.names: data, hdfname = self.prepareDataForWriting( waveformInfo[name], name, generic.ARRAY_TYPE_WAVEFORMS) if data.size > 0: if hdfname in waveHandle: self.lastWaveSpace.write(waveHandle[hdfname], data.copy()) else: self.createDataColumn(waveHandle, hdfname, data) if transmitted is not None: self.lastTransSpace.write(self.fileHandle['DATA']['TRANSMITTED'], transmitted) if received is not None: self.lastRecvSpace.write(self.fileHandle['DATA']['RECEIVED'], received) # update the header info self.updateHeaderFromData(points, pulses, waveformInfo)
[docs] def updateHeaderFromData(self, points, pulses, waveformInfo): """ Given some data, updates the _MIN, _MAX etc """ # Pull out the types for NUMBER_OF_POINTS and NUMBER_OF_PULSES nPointsType = HEADER_FIELDS['NUMBER_OF_POINTS'] nPulsesType = HEADER_FIELDS['NUMBER_OF_PULSES'] if points is not None and points.size > 0: for key in POINTS_HEADER_UPDATE_DICT.keys(): if key in points.dtype.names: minVal = points[key].min() maxVal = points[key].max() minKey, maxKey = POINTS_HEADER_UPDATE_DICT[key] if minVal < self.fileHandle.attrs[minKey]: self.fileHandle.attrs[minKey] = minVal if maxVal > self.fileHandle.attrs[maxKey]: self.fileHandle.attrs[maxKey] = maxVal # update the NUMBER_OF_POINTS field also # ensure these are saved back as the same type if self.mode == generic.CREATE: self.fileHandle.attrs['NUMBER_OF_POINTS'] += nPointsType(points.size) if pulses is not None and pulses.size > 0: for key in PULSES_HEADER_UPDATE_DICT.keys(): if key in pulses.dtype.names: minVal = pulses[key].min() maxVal = pulses[key].max() minKey, maxKey = PULSES_HEADER_UPDATE_DICT[key] if minVal < self.fileHandle.attrs[minKey]: self.fileHandle.attrs[minKey] = minVal if maxVal > self.fileHandle.attrs[maxKey]: self.fileHandle.attrs[maxKey] = maxVal # update the NUMBER_OF_PULSES field also # ensure these are saved back as the same type if self.mode == generic.CREATE: self.fileHandle.attrs['NUMBER_OF_PULSES'] += nPulsesType(pulses.size) if waveformInfo is not None and waveformInfo.size > 0: for key in WAVEFORMS_HEADER_UPDATE_DICT.keys(): if key in waveformInfo.dtype.names: minVal = wavefromInfo[key].min() maxVal = wavefromInfo[key].max() minKey, maxKey = WAVEFORMS_HEADER_UPDATE_DICT[key] if minVal < self.fileHandle.attrs[minKey]: self.fileHandle.attrs[minKey] = minVal if maxVal > self.fileHandle.attrs[maxKey]: self.fileHandle.attrs[maxKey] = maxVal # update the NUMBER_OF_WAVEFORMS field also if self.mode == generic.CREATE: self.fileHandle.attrs['NUMBER_OF_WAVEFORMS'] += waveformInfo.size
# The functions below are for when there is no spatial index.
[docs] def setPulseRange(self, pulseRange): """ Set the range of pulses to read """ # copy it so we can change the values if beyond the # range of data self.pulseRange = copy.copy(pulseRange) nTotalPulses = self.getTotalNumberPulses() bMore = True if self.pulseRange.startPulse >= nTotalPulses: # no data to read self.pulseRange.startPulse = 0 self.pulseRange.endPulse = 0 bMore = False elif self.pulseRange.endPulse >= nTotalPulses: self.pulseRange.endPulse = nTotalPulses return bMore
[docs] def readPointsForRange(self, colNames=None): """ Read all the points for the specified range of pulses """ pointsHandle = self.fileHandle['DATA']['POINTS'] if colNames is None: # get all names colNames = pointsHandle.keys() if (self.lastPulseRange is not None and self.lastPulseRange == self.pulseRange and self.lastPoints is not None and self.lastPointsColumns is not None): if colNames == self.lastPointsColumns: return self.lastPoints if (isinstance(colNames, str) and self.lastPoints.dtype.names is not None and colNames in self.lastPoints.dtype.names): return self.lastPoints[colNames] if (self.lastPulseRange is None or self.lastPulseRange != self.pulseRange or self.lastPointsSpace is None): # otherwise we can re-use the self.lastPointsSpace nReturns = self.readPulsesForRange('NUMBER_OF_RETURNS') startIdxs = self.readPulsesForRange('PTS_START_IDX') if 'RETURN_NUMBER' not in pointsHandle: # not much else we can do... # means to points were written to the file, # although there might be pulses # TODO: is this correct? return None nOut = pointsHandle['RETURN_NUMBER'].shape[0] point_space, point_idx, point_idx_mask = gridindexutils.convertSPDIdxToReadIdxAndMaskInfo( startIdxs, nReturns, nOut) # keep these indices from pulses to points - handy for the indexing # functions. self.lastPointsSpace = point_space self.lastPoints_Idx = point_idx self.lastPoints_IdxMask = point_idx_mask # print('new range') #else: # print('reuse range') points = self.readFieldsAndUnScale(pointsHandle, colNames, self.lastPointsSpace) # translate any classifications self.recodeClassification(points, generic.RECODE_TO_LAS, colNames) self.lastPoints = points self.lastPointsColumns = colNames # self.lastPulseRange copied in readPulsesForRange() return points
[docs] def readPulsesForRange(self, colNames=None): """ Read the specified range of pulses """ pulsesHandle = self.fileHandle['DATA']['PULSES'] if colNames is None: # get all names colNames = pulsesHandle.keys() if (self.lastPulseRange is not None and self.lastPulseRange == self.pulseRange and self.lastPulses is not None and self.lastPulsesColumns is not None): if self.lastPulsesColumns == colNames: return self.lastPulses if (isinstance(colNames, str) and self.lastPulses.dtype.names is not None and colNames in self.lastPulses.dtype.names): return self.lastPulses[colNames] if (self.lastPulseRange is None or self.lastPulseRange != self.pulseRange or self.lastPulsesSpace is None): nOut = pulsesHandle['PULSE_ID'].shape[0] space = h5space.createSpaceFromRange(self.pulseRange.startPulse, self.pulseRange.endPulse, nOut) self.lastPulsesSpace = space self.lastPulseRange = copy.copy(self.pulseRange) self.lastPoints = None # now invalid self.lastPointsSpace = None pulses = self.readFieldsAndUnScale(pulsesHandle, colNames, self.lastPulsesSpace) self.lastPulses = pulses self.lastPulsesColumns = colNames return pulses
[docs] def getTotalNumberPulses(self): """ Return the total number of pulses """ # PULSE_ID is always present. pulseHandle = self.fileHandle['DATA']['PULSES'] if 'PULSE_ID' in pulseHandle: return pulseHandle['PULSE_ID'].shape[0] else: return 0
[docs] def getHeader(self): """ Return our attributes on the file """ return self.fileHandle.attrs
[docs] def setHeader(self, newHeaderDict): """ Update our cached dictionary """ if self.mode == generic.READ: msg = 'Can only set header values on update or create' raise generic.LiDARInvalidSetting(msg) for key in newHeaderDict.keys(): self.fileHandle.attrs[key] = newHeaderDict[key]
[docs] def getHeaderValue(self, name): """ Just extract the one value and return it """ return self.fileHandle.attrs[name]
[docs] def setHeaderValue(self, name, value): """ Just update one value in the header """ if self.mode == generic.READ: msg = 'Can only set header values on update or create' raise generic.LiDARInvalidSetting(msg) self.fileHandle.attrs[name] = value
[docs] def setScaling(self, colName, arrayType, gain, offset): """ Set the scaling for the given column name """ if self.mode == generic.READ: msg = 'Can only set scaling values on update or create' raise generic.LiDARInvalidSetting(msg) if arrayType == generic.ARRAY_TYPE_PULSES: self.pulseScalingValues[colName] = (gain, offset) elif arrayType == generic.ARRAY_TYPE_POINTS: self.pointScalingValues[colName] = (gain, offset) elif arrayType == generic.ARRAY_TYPE_WAVEFORMS: self.waveFormScalingValues[colName] = (gain, offset) else: raise generic.LiDARInvalidSetting('Unsupported array type')
[docs] def getScaling(self, colName, arrayType): """ Returns the scaling (gain, offset) for the given column name reads from our cache since only written to file on close Raises generic.LiDARArrayColumnError if no scaling (yet) set for this column. """ if arrayType == generic.ARRAY_TYPE_PULSES: if colName in self.pulseScalingValues: return self.pulseScalingValues[colName] handle = self.fileHandle['DATA']['PULSES'] elif arrayType == generic.ARRAY_TYPE_POINTS: if colName in self.pointScalingValues: return self.pointScalingValues[colName] handle = self.fileHandle['DATA']['POINTS'] elif arrayType == generic.ARRAY_TYPE_WAVEFORMS: if colName in self.waveFormScalingValues: return self.waveFormScalingValues[colName] handle = self.fileHandle['DATA']['WAVEFORMS'] else: raise generic.LiDARInvalidSetting('Unsupported array type') attrs = None if colName in handle: attrs = handle[colName].attrs if attrs is not None and GAIN_NAME in attrs and OFFSET_NAME in attrs: gain = attrs[GAIN_NAME] offset = attrs[OFFSET_NAME] else: msg = 'gain and offset not found for column %s' % colName raise generic.LiDARArrayColumnError(msg) return gain, offset
[docs] def setNativeDataType(self, colName, arrayType, dtype): """ Set the native dtype (numpy.int16 etc)that a column is stored as internally after scaling (if any) is applied. arrayType is one of the lidarprocessor.ARRAY_TYPE_* constants generic.LiDARArrayColumnError is raised if this cannot be set for the format. The default behaviour is to create new columns in the correct type for the format, or if they are optional, in the same type as the input array. """ if self.mode == generic.READ: msg = 'Can only set scaling values on update or create' raise generic.LiDARInvalidSetting(msg) if arrayType == generic.ARRAY_TYPE_PULSES: if colName in PULSE_FIELDS: if PULSE_FIELDS[colName] != dtype: msg = 'Cannot change type for column %s' % colName raise generic.LiDARArrayColumnError(msg) else: self.pulseDtypes[colName] = dtype elif arrayType == generic.ARRAY_TYPE_POINTS: if colName in POINT_FIELDS: if POINT_FIELDS[colName] != dtype: msg = 'Cannot change type for column %s' % colName raise generic.LiDARArrayColumnError(msg) else: self.pointDtypes[colName] = dtype elif arrayType == generic.ARRAY_TYPE_WAVEFORMS: if colName in WAVEFORM_FIELDS: if WAVEFORM_FIELDS[colName] != dtype: msg = 'Cannot change type for column %s' % colName raise generic.LiDARArrayColumnError(msg) else: self.waveFormDtypes[colName] = dtype else: raise generic.LiDARInvalidSetting('Unsupported array type')
[docs] def getNativeDataType(self, colName, arrayType): """ Return the native dtype (numpy.int16 etc)that a column is stored as internally after scaling is applied. Provided so scaling can be adjusted when translating between formats. Note that for 'non essential' columns this will depend on the data type that the column was to begin with. arrayType is one of the lidarprocessor.ARRAY_TYPE_* constants """ if arrayType == generic.ARRAY_TYPE_PULSES: if colName in PULSE_FIELDS: return PULSE_FIELDS[colName] handle = self.fileHandle['DATA']['PULSES'] elif arrayType == generic.ARRAY_TYPE_POINTS: if colName in POINT_FIELDS: return POINT_FIELDS[colName] handle = self.fileHandle['DATA']['POINTS'] elif arrayType == generic.ARRAY_TYPE_WAVEFORMS: if colName in WAVEFORM_FIELDS: return WAVEFORM_FIELDS[colName] handle = self.fileHandle['DATA']['WAVEFORMS'] else: raise generic.LiDARInvalidSetting('Unsupported array type') if colName in handle: return handle[colName].dtype else: msg = 'Cannot find column %s' % colName raise generic.LiDARArrayColumnError(msg)
[docs] def setNullValue(self, colName, arrayType, value, scaled=True): """ Sets the 'null' value for the given column. arrayType is one of the lidarprocessor.ARRAY_TYPE_* constants We don't write anything out at this stage as the scaling, if needed, mightn't be set at this stage. The work is done when the file is closed. """ if arrayType == generic.ARRAY_TYPE_PULSES: self.pulseNullValues[colName] = (value, scaled) elif arrayType == generic.ARRAY_TYPE_POINTS: self.pointNullValues[colName] = (value, scaled) elif arrayType == generic.ARRAY_TYPE_WAVEFORMS: self.waveFormNullValues[colName] = (value, scaled) else: raise generic.LiDARInvalidSetting('Unsupported array type')
[docs] def getNullValue(self, colName, arrayType, scaled=True): """ Get the 'null' value for the given column. arrayType is one of the lidarprocessor.ARRAY_TYPE_* constants """ handle = None if arrayType == generic.ARRAY_TYPE_PULSES: if colName in self.pulseNullValues: value, scaledStored = self.pulseNullValues[colName] else: handle = self.fileHandle['DATA']['PULSES'] elif arrayType == generic.ARRAY_TYPE_POINTS: if colName in self.pointNullValues: value, scaledStored = self.pointNullValues[colName] else: handle = self.fileHandle['DATA']['POINTS'] elif arrayType == generic.ARRAY_TYPE_WAVEFORMS: if colName in self.waveFormNullValues: value, scaledStored = self.waveFormNullValues[colName] else: handle = self.fileHandle['DATA']['WAVEFORMS'] else: raise generic.LiDARInvalidSetting('Unsupported array type') if handle is None: # value and scaledStored should be set if scaled == scaledStored: return value else: gain, offset = self.getScaling(colName, arrayType) if scaled: # they requested scaled, but we stored unscaled return (value / gain) + offset else: # they requested unscaled, but we stored scaled return (value - offset) * gain else: if colName in handle: attrs = handle[colName].attrs if NULL_NAME not in attrs: msg = 'Null value not set for column %s' % colName raise generic.LiDARArrayColumnError(msg) if scaled: gain, offset = self.getScaling(colName, arrayType) return (attrs[NULL_NAME] / gain) + offset else: return attrs[NULL_NAME] else: msg = 'Cannot find column %s' % colName raise generic.LiDARArrayColumnError(msg)
[docs] def getScalingColumns(self, arrayType): """ arrayType is one of the lidarprocessor.ARRAY_TYPE_* constants """ cols = [] if arrayType == generic.ARRAY_TYPE_PULSES: # first add the compulsory ones for col in PULSE_SCALED_FIELDS: cols.append(col) # then other ones they may have set that aren't compulsory for col in self.pulseScalingValues.keys(): if col not in cols: cols.append(col) elif arrayType == generic.ARRAY_TYPE_POINTS: # first add the compulsory ones for col in POINT_SCALED_FIELDS: cols.append(col) # then other ones they may have set that aren't compulsory for col in self.pointScalingValues.keys(): if col not in cols: cols.append(col) elif arrayType == generic.ARRAY_TYPE_WAVEFORMS: # first add the compulsory ones for col in WAVEFORM_SCALED_FIELDS: cols.append(col) # then other ones they may have set that aren't compulsory for col in self.waveFormScalingValues.keys(): if col not in cols: cols.append(col) else: raise generic.LiDARInvalidSetting('Unsupported array type') return cols
[docs]class SPDV4FileInfo(generic.LiDARFileInfo): """ Class that gets information about a SPDV4 file and makes it available as fields. """ def __init__(self, fname): generic.LiDARFileInfo.__init__(self, fname) # attempt to open the file try: fileHandle = h5py.File(fname, 'r') except (OSError, IOError) as err: # always seems to through an OSError raise generic.LiDARFormatNotUnderstood(str(err)) # check that it is indeed the right version # and get attributes fileAttrs = fileHandle.attrs if not 'VERSION_SPD' in fileAttrs: msg = "File appears not to be SPD" raise generic.LiDARFormatNotUnderstood(msg) elif fileAttrs['VERSION_SPD'][0] != SPDV4_VERSION_MAJOR: msg = "File seems to be wrong version for this driver" raise generic.LiDARFormatNotUnderstood(msg) # save the header as a dictionary self.header = {} for key in fileHandle.attrs.keys(): self.header[key] = fileHandle.attrs[key] # pull a few things out to the top level self.pulse_fields = [str(k) for k in fileHandle['DATA']['PULSES'].keys()] self.point_fields = [str(k) for k in fileHandle['DATA']['POINTS'].keys()] if 'WAVEFORMS' in fileHandle['DATA'].keys(): self.waveform_fields = [str(k) for k in fileHandle['DATA']['WAVEFORMS'].keys()] si_handler = spdv4_index.SPDV4SpatialIndex.getHandlerForFile(fileHandle, generic.READ) self.has_Spatial_Index = si_handler is not None # probably other things too
[docs] @staticmethod def getDriverName(): """ Name of this driver """ return "SPDV4"
[docs] @staticmethod def getHeaderTranslationDict(): """ Return dictionary with non-standard header names """ return HEADER_TRANSLATION_DICT