Source code for pylidar.lidarformats.h5space

"""
A utilities to extend h5py's reading and writing of ranges of data,
specifically the ability to quickly deal with multiple ranges.
"""
# This file is part of PyLidar
# Copyright (C) 2015 John Armston, Pete Bunting, Neil Flood, Sam Gillingham
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function, division

import os
import sys
import numpy
import h5py
from numba import jit
import ctypes
from ctypes.util import find_library

# Need to give ourselves access to H5Sselect_hyperslab()
# within the HDF5 library so we can call it from numba
# Sadly, we also need to cope with not accessing this, in order that ReadTheDocs will still work
H5Sselect_hyperslab = None
if os.getenv('READTHEDOCS', default='False') != 'True':
    for libname in ['hdf5', 'hdf5_serial']:
        hdf5lib = find_library(libname)
        if hdf5lib is not None:
            break
    if hdf5lib is None:
        raise IOError('HDF5 Library not found')
            
    HDF5_DLL = ctypes.CDLL(hdf5lib)
    H5Sselect_hyperslab = HDF5_DLL.H5Sselect_hyperslab
    # checked on 64 and 32 bits
    H5Sselect_hyperslab.argtypes = [ctypes.c_int64, ctypes.c_int, ctypes.c_void_p, 
                    ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p]
    H5Sselect_hyperslab.restype = ctypes.c_int

@jit
def convertBoolToHDF5Space(boolArray, boolStart, spaceid, start, count, 
        select_hyperslab, selectSet, selectOr):
    """
    Convert boolArray and boolStart into a newly created h5.h5s.SpaceID object.

    * spaceid should be the .id of the h5.h5s.SpaceID object.
    * start and count should be empty length 1 numpy.uint64 arrays
    * select_hyperslab should be the global H5Sselect_hyperslab.
    * selectSet and selectOr shold be the value of h5py.h5s.SELECT_SET and h5py.h5s.SELECT_OR respectively.

    """
    nVals = boolArray.shape[0]
    op = selectSet
    start[0] = boolStart

    startRange = 0
    state = boolArray[0]
    for n in range(1, nVals):
        if state != boolArray[n]:
            if not boolArray[n]:
                # end of a range
                count[0] = n - startRange
                select_hyperslab(spaceid, op, start.ctypes.data, 
                            0, count.ctypes.data, 0)
                op = selectOr
                state = boolArray[n]
            else:
                # start of a range
                start[0] = boolStart + n
                startRange = n
                state = boolArray[n]
                
    if boolArray[nVals - 1]:
        # ended on a true - need to end range
        count[0] = nVals - startRange
        if count[0] > 0:
            select_hyperslab(spaceid, op, start.ctypes.data, 
                                    0, count.ctypes.data, 0)


@jit
def updateFromBool(spaceid, boolStart, boolArray, mask, start, count, 
                select_hyperslab, selectNotb):
    """
    Given a Space and the original boolArray with a mask that applies
    when boolArray == True update both the mask and the Space.
    
    selectNotb should be h5py.h5s.SELECT_NOTB
    """
    nVals = boolArray.shape[0]
    state = boolArray[0]
    state = False
    startRange = 0
    nM = 0 # mask
    lastN = 0 # for mask
    for n in range(nVals):
        # only consider when True in the original mask
        if boolArray[n]:
            # First True element - init state
            if nM == 0:
                state = mask[nM]
                startRange = n
                start[0] = boolStart + n
            # change of state
            elif state != mask[nM]:
                if mask[nM]:
                    # end of a range of Falses
                    count[0] = n - startRange
                    select_hyperslab(spaceid, selectNotb, start.ctypes.data, 
                                0, count.ctypes.data, 0)
                    state = mask[nM]
                else:
                    # end of a range of Trues
                    start[0] = boolStart + n
                    startRange = n
                    state = mask[nM]
            lastN = n
            nM += 1
            boolArray[n] = state
            
    if nM > 0 and not mask[nM - 1]:
        # ended on a False - must close the range
        count[0] = (lastN+1) - startRange
        select_hyperslab(spaceid, selectNotb, start.ctypes.data,
            0, count.ctypes.data, 0)

[docs]def createSpaceFromRange(start, end, size): """ Creates a H5Space object given the start and end of a range """ boolArray = numpy.ones((end - start), dtype=numpy.bool) space = H5Space(size, boolArray, start) return space
[docs]class H5Space(object): """ Object that wraps a h5py.h5s.SpaceID object and allows conversion quickly from boolean arrays used elsewhere. Also contains methods for reading and writing to/from h5py datasets. """ def __init__(self, size, boolArray=None, boolStart=None, indices=None): """ size is the size of the dataset this object will be used with boolArray is a boolean array that is True for elements to be read. boolStart is the index into the start of the dataset where boolArray begins. indices is an array containing the indices that need to be selected. Pass either boolArray and boolStart or indices but not all 3. """ # create the space object self.space = h5py.h5s.create_simple((size,), (size,)) # default is all selected - reset to none in case boolArray all False self.space.select_none() if boolArray is not None and boolStart is not None: # convert the bool array into it start = numpy.empty(1, dtype=numpy.uint64) count = numpy.empty(1, dtype=numpy.uint64) if boolArray.size > 0: convertBoolToHDF5Space(boolArray, boolStart, self.space.id, start, count, H5Sselect_hyperslab, h5py.h5s.SELECT_SET, h5py.h5s.SELECT_OR) # grab these for updateBoolArray() self.boolArray = boolArray self.boolStart = boolStart self.indices = None elif indices is not None: if len(indices) > 0: # this step is necessary otherwise you get the error: # Coordinate array must have shape (<npoints>, 1) indices = numpy.expand_dims(indices, axis=1) self.space.select_elements(indices) self.indices = indices self.boolArray = None self.boolStart = None else: msg = 'Need to specify either boolArray and boolStart or indices' raise ValueError(msg)
[docs] def read(self, dataSet): """ Given a h5py dataset read the data ranges selected and return a numpy array. """ # create an empty array of the right size npoints = self.space.get_select_npoints() data = numpy.empty(npoints, dtype=dataSet.dtype) if npoints > 0: # create a 'mspace' which is the size of the array mspace = h5py.h5s.create_simple(data.shape, data.shape) # read it dataSet.id.read(mspace, self.space, data) return data
[docs] def write(self, dataSet, data): """ Given a h5py dataset and a numpy array write the array into the ranges selected. """ if data.size > 0: # create the mpspace mspace = h5py.h5s.create_simple(data.shape, data.shape) # write it dataSet.id.write(mspace, self.space, data)
[docs] def updateBoolArray(self, mask): """ Update the h5py.h5s.SpaceID object (and cached boolArray) with the mask which applies to the current selection. """ #if mask.size != self.space.get_select_npoints(): # raise ValueError('mask is wrong size') if self.boolStart is not None and self.boolArray is not None: start = numpy.empty(1, dtype=numpy.uint64) count = numpy.empty(1, dtype=numpy.uint64) updateFromBool(self.space.id, self.boolStart, self.boolArray, mask, start, count, H5Sselect_hyperslab, h5py.h5s.SELECT_NOTB) else: # indices self.indices = self.indices[mask] self.space.select_none() self.space.select_elements(self.indices)
[docs] def getSelectionSize(self): """ Return the number of elements that are cuurently selected """ return self.space.get_select_npoints()
[docs] def getSelectedIndices(self): """ Return the selected indices, mainly for used my advanced spatial indices. Returns self.indices if set, otherwise works it out form boolArray etc. """ if self.indices is not None: return self.indices else: indices = numpy.arange(self.boolStart, self.boolStart + len(self.boolArray), dtype=numpy.uint64) return indices[self.boolArray]