Source code for pygeoprocessing.geoprocessing

# coding=UTF-8
"""A collection of raster and vector algorithms and utilities."""
import collections
import functools
import logging
import math
import os
import pprint
import queue
import shutil
import sys
import tempfile
import threading
import time

import numpy
import numpy.ma
import rtree
import scipy.interpolate
import scipy.ndimage
import scipy.signal
import scipy.signal.signaltools
import scipy.sparse
import shapely.ops
import shapely.prepared
import shapely.wkb
from osgeo import gdal
from osgeo import ogr
from osgeo import osr

from . import geoprocessing_core
from .geoprocessing_core import DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS
from .geoprocessing_core import DEFAULT_OSR_AXIS_MAPPING_STRATEGY

# This is used to efficiently pass data to the raster stats worker if available
if sys.version_info >= (3, 8):
    import multiprocessing.shared_memory


[docs]class ReclassificationMissingValuesError(Exception): """Raised when a raster value is not a valid key to a dictionary. Attributes: msg (str) - error message missing_values (list) - a list of the missing values from the raster that are not keys in the dictionary """ def __init__(self, missing_values, raster_path, value_map): """See Attributes for args docstring.""" self.msg = ( f'The following {missing_values.size} raster values ' f'{missing_values} from "{raster_path}" do not have corresponding ' f'entries in the value map: {value_map}.') self.missing_values = missing_values super().__init__(self.msg)
LOGGER = logging.getLogger(__name__) # Used in joining finished TaskGraph Tasks. _MAX_TIMEOUT = 60.0 _VALID_GDAL_TYPES = ( set([getattr(gdal, x) for x in dir(gdal.gdalconst) if 'GDT_' in x])) _LOGGING_PERIOD = 5.0 # min 5.0 seconds per update log message for the module _LARGEST_ITERBLOCK = 2**16 # largest block for iterblocks to read in cells _GDAL_TYPE_TO_NUMPY_LOOKUP = { gdal.GDT_Byte: numpy.uint8, gdal.GDT_Int16: numpy.int16, gdal.GDT_Int32: numpy.int32, gdal.GDT_UInt16: numpy.uint16, gdal.GDT_UInt32: numpy.uint32, gdal.GDT_Float32: numpy.float32, gdal.GDT_Float64: numpy.float64, gdal.GDT_CFloat32: numpy.csingle, gdal.GDT_CFloat64: numpy.complex64, }
[docs]def raster_calculator( base_raster_path_band_const_list, local_op, target_raster_path, datatype_target, nodata_target, calc_raster_stats=True, use_shared_memory=False, largest_block=_LARGEST_ITERBLOCK, max_timeout=_MAX_TIMEOUT, raster_driver_creation_tuple=DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS): """Apply local a raster operation on a stack of rasters. This function applies a user defined function across a stack of rasters' pixel stack. The rasters in ``base_raster_path_band_list`` must be spatially aligned and have the same cell sizes. Args: base_raster_path_band_const_list (sequence): a sequence containing: * ``(str, int)`` tuples, referring to a raster path/band index pair to use as an input. * ``numpy.ndarray`` s of up to two dimensions. These inputs must all be broadcastable to each other AND the size of the raster inputs. * ``(object, 'raw')`` tuples, where ``object`` will be passed directly into the ``local_op``. All rasters must have the same raster size. If only arrays are input, numpy arrays must be broadcastable to each other and the final raster size will be the final broadcast array shape. A value error is raised if only "raw" inputs are passed. local_op (function): a function that must take in as many parameters as there are elements in ``base_raster_path_band_const_list``. The parameters in ``local_op`` will map 1-to-1 in order with the values in ``base_raster_path_band_const_list``. ``raster_calculator`` will call ``local_op`` to generate the pixel values in ``target_raster`` along memory block aligned processing windows. Note any particular call to ``local_op`` will have the arguments from ``raster_path_band_const_list`` sliced to overlap that window. If an argument from ``raster_path_band_const_list`` is a raster/path band tuple, it will be passed to ``local_op`` as a 2D numpy array of pixel values that align with the processing window that ``local_op`` is targeting. A 2D or 1D array will be sliced to match the processing window and in the case of a 1D array tiled in whatever dimension is flat. If an argument is a scalar it is passed as as scalar. The return value must be a 2D array of the same size as any of the input parameter 2D arrays and contain the desired pixel values for the target raster. target_raster_path (string): the path of the output raster. The projection, size, and cell size will be the same as the rasters in ``base_raster_path_const_band_list`` or the final broadcast size of the constant/ndarray values in the list. datatype_target (gdal datatype; int): the desired GDAL output type of the target raster. nodata_target (numerical value): the desired nodata value of the target raster. calc_raster_stats (boolean): If True, calculates and sets raster statistics (min, max, mean, and stdev) for target raster. use_shared_memory (boolean): If True, uses Python Multiprocessing shared memory to calculate raster stats for faster performance. This feature is available for Python >= 3.8 and will otherwise be ignored for earlier versions of Python. largest_block (int): Attempts to internally iterate over raster blocks with this many elements. Useful in cases where the blocksize is relatively small, memory is available, and the function call overhead dominates the iteration. Defaults to 2**20. A value of anything less than the original blocksize of the raster will result in blocksizes equal to the original size. max_timeout (float): amount of time in seconds to wait for stats worker thread to join. Default is _MAX_TIMEOUT. raster_driver_creation_tuple (tuple): a tuple containing a GDAL driver name string as the first element and a GDAL creation options tuple/list as the second. Defaults to geoprocessing.DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS. Return: None Raises: ValueError: invalid input provided """ if not base_raster_path_band_const_list: raise ValueError( "`base_raster_path_band_const_list` is empty and " "should have at least one value.") # It's a common error to not pass in path/band tuples, so check for that # and report error if so bad_raster_path_list = False if not isinstance(base_raster_path_band_const_list, (list, tuple)): bad_raster_path_list = True else: for value in base_raster_path_band_const_list: if (not _is_raster_path_band_formatted(value) and not isinstance(value, numpy.ndarray) and not (isinstance(value, tuple) and len(value) == 2 and value[1] == 'raw')): bad_raster_path_list = True break if bad_raster_path_list: raise ValueError( "Expected a sequence of path / integer band tuples, " "ndarrays, or (value, 'raw') pairs for " "`base_raster_path_band_const_list`, instead got: " "%s" % pprint.pformat(base_raster_path_band_const_list)) # check that any rasters exist on disk and have enough bands not_found_paths = [] gdal.PushErrorHandler('CPLQuietErrorHandler') base_raster_path_band_list = [ path_band for path_band in base_raster_path_band_const_list if _is_raster_path_band_formatted(path_band)] for value in base_raster_path_band_list: if gdal.OpenEx(value[0], gdal.OF_RASTER) is None: not_found_paths.append(value[0]) gdal.PopErrorHandler() if not_found_paths: raise ValueError( "The following files were expected but do not exist on the " "filesystem: " + str(not_found_paths)) # check that band index exists in raster invalid_band_index_list = [] for value in base_raster_path_band_list: raster = gdal.OpenEx(value[0], gdal.OF_RASTER) if not (1 <= value[1] <= raster.RasterCount): invalid_band_index_list.append(value) raster = None if invalid_band_index_list: raise ValueError( "The following rasters do not contain requested band " "indexes: %s" % invalid_band_index_list) # check that the target raster is not also an input raster if target_raster_path in [x[0] for x in base_raster_path_band_list]: raise ValueError( "%s is used as a target path, but it is also in the base input " "path list %s" % ( target_raster_path, str(base_raster_path_band_const_list))) # check that raster inputs are all the same dimensions raster_info_list = [ get_raster_info(path_band[0]) for path_band in base_raster_path_band_const_list if _is_raster_path_band_formatted(path_band)] geospatial_info_set = set() for raster_info in raster_info_list: geospatial_info_set.add(raster_info['raster_size']) if len(geospatial_info_set) > 1: raise ValueError( "Input Rasters are not the same dimensions. The " "following raster are not identical %s" % str( geospatial_info_set)) numpy_broadcast_list = [ x for x in base_raster_path_band_const_list if isinstance(x, numpy.ndarray)] stats_worker_thread = None try: # numpy.broadcast can only take up to 32 arguments, this loop works # around that restriction: while len(numpy_broadcast_list) > 1: numpy_broadcast_list = ( [numpy.broadcast(*numpy_broadcast_list[:32])] + numpy_broadcast_list[32:]) if numpy_broadcast_list: numpy_broadcast_size = numpy_broadcast_list[0].shape except ValueError: # this gets raised if numpy.broadcast fails raise ValueError( "Numpy array inputs cannot be broadcast into a single shape %s" % numpy_broadcast_list) if numpy_broadcast_list and len(numpy_broadcast_list[0].shape) > 2: raise ValueError( "Numpy array inputs must be 2 dimensions or less %s" % numpy_broadcast_list) # if there are both rasters and arrays, check the numpy shape will # be broadcastable with raster shape if raster_info_list and numpy_broadcast_list: # geospatial lists x/y order and numpy does y/x so reverse size list raster_shape = tuple(reversed(raster_info_list[0]['raster_size'])) invalid_broadcast_size = False if len(numpy_broadcast_size) == 1: # if there's only one dimension it should match the last # dimension first, in the raster case this is the columns # because of the row/column order of numpy. No problem if # that value is ``1`` because it will be broadcast, otherwise # it should be the same as the raster. if (numpy_broadcast_size[0] != raster_shape[1] and numpy_broadcast_size[0] != 1): invalid_broadcast_size = True else: for dim_index in range(2): # no problem if 1 because it'll broadcast, otherwise must # be the same value if (numpy_broadcast_size[dim_index] != raster_shape[dim_index] and numpy_broadcast_size[dim_index] != 1): invalid_broadcast_size = True if invalid_broadcast_size: raise ValueError( "Raster size %s cannot be broadcast to numpy shape %s" % ( raster_shape, numpy_broadcast_size)) # create a "canonical" argument list that's bands, 2d numpy arrays, or # raw values only base_canonical_arg_list = [] base_raster_list = [] base_band_list = [] for value in base_raster_path_band_const_list: # the input has been tested and value is either a raster/path band # tuple, 1d ndarray, 2d ndarray, or (value, 'raw') tuple. if _is_raster_path_band_formatted(value): # it's a raster/path band, keep track of open raster and band # for later so we can `None` them. base_raster_list.append(gdal.OpenEx(value[0], gdal.OF_RASTER)) base_band_list.append( base_raster_list[-1].GetRasterBand(value[1])) base_canonical_arg_list.append(base_band_list[-1]) elif isinstance(value, numpy.ndarray): if value.ndim == 1: # easier to process as a 2d array for writing to band base_canonical_arg_list.append( value.reshape((1, value.shape[0]))) else: # dimensions are two because we checked earlier. base_canonical_arg_list.append(value) elif isinstance(value, tuple): base_canonical_arg_list.append(value) else: raise ValueError( "An unexpected ``value`` occurred. This should never happen. " "Value: %r" % value) # create target raster if raster_info_list: # if rasters are passed, the target is the same size as the raster n_cols, n_rows = raster_info_list[0]['raster_size'] elif numpy_broadcast_list: # numpy arrays in args and no raster result is broadcast shape # expanded to two dimensions if necessary if len(numpy_broadcast_size) == 1: n_rows, n_cols = 1, numpy_broadcast_size[0] else: n_rows, n_cols = numpy_broadcast_size else: raise ValueError( "Only (object, 'raw') values have been passed. Raster " "calculator requires at least a raster or numpy array as a " "parameter. This is the input list: %s" % pprint.pformat( base_raster_path_band_const_list)) if datatype_target not in _VALID_GDAL_TYPES: raise ValueError( 'Invalid target type, should be a gdal.GDT_* type, received ' '"%s"' % datatype_target) # create target raster raster_driver = gdal.GetDriverByName(raster_driver_creation_tuple[0]) try: os.makedirs(os.path.dirname(target_raster_path)) except OSError: pass target_raster = raster_driver.Create( target_raster_path, n_cols, n_rows, 1, datatype_target, options=raster_driver_creation_tuple[1]) target_band = target_raster.GetRasterBand(1) if nodata_target is not None: target_band.SetNoDataValue(nodata_target) if base_raster_list: # use the first raster in the list for the projection and geotransform target_raster.SetProjection(base_raster_list[0].GetProjection()) target_raster.SetGeoTransform(base_raster_list[0].GetGeoTransform()) target_band.FlushCache() target_raster.FlushCache() try: last_time = time.time() block_offset_list = list(iterblocks( (target_raster_path, 1), offset_only=True, largest_block=largest_block)) if calc_raster_stats: # if this queue is used to send computed valid blocks of # the raster to an incremental statistics calculator worker stats_worker_queue = queue.Queue() exception_queue = queue.Queue() if sys.version_info >= (3, 8) and use_shared_memory: # The stats worker keeps running variables as a float64, so # all input rasters are dtype float64 -- make the shared memory # size equivalent. block_size_bytes = ( numpy.dtype(numpy.float64).itemsize * block_offset_list[0]['win_xsize'] * block_offset_list[0]['win_ysize']) shared_memory = multiprocessing.shared_memory.SharedMemory( create=True, size=block_size_bytes) else: stats_worker_queue = None if calc_raster_stats: # To avoid doing two passes on the raster to calculate standard # deviation, we implement a continuous statistics calculation # as the raster is computed. This computational effort is high # and benefits from running in parallel. This queue and worker # takes a valid block of a raster and incrementally calculates # the raster's statistics. When ``None`` is pushed to the queue # the worker will finish and return a (min, max, mean, std) # tuple. LOGGER.info('starting stats_worker') stats_worker_thread = threading.Thread( target=geoprocessing_core.stats_worker, args=(stats_worker_queue, len(block_offset_list))) stats_worker_thread.daemon = True stats_worker_thread.start() LOGGER.info('started stats_worker %s', stats_worker_thread) pixels_processed = 0 n_pixels = n_cols * n_rows # iterate over each block and calculate local_op for block_offset in block_offset_list: # read input blocks offset_list = (block_offset['yoff'], block_offset['xoff']) blocksize = (block_offset['win_ysize'], block_offset['win_xsize']) data_blocks = [] for value in base_canonical_arg_list: if isinstance(value, gdal.Band): data_blocks.append(value.ReadAsArray(**block_offset)) # I've encountered the following error when a gdal raster # is corrupt, often from multiple threads writing to the # same file. This helps to catch the error early rather # than lead to confusing values of ``data_blocks`` later. if not isinstance(data_blocks[-1], numpy.ndarray): raise ValueError( f"got a {data_blocks[-1]} when trying to read " f"{value.GetDataset().GetFileList()} at " f"{block_offset}, expected numpy.ndarray.") elif isinstance(value, numpy.ndarray): # must be numpy array and all have been conditioned to be # 2d, so start with 0:1 slices and expand if possible slice_list = [slice(0, 1)] * 2 tile_dims = list(blocksize) for dim_index in [0, 1]: if value.shape[dim_index] > 1: slice_list[dim_index] = slice( offset_list[dim_index], offset_list[dim_index] + blocksize[dim_index],) tile_dims[dim_index] = 1 data_blocks.append( numpy.tile(value[tuple(slice_list)], tile_dims)) else: # must be a raw tuple data_blocks.append(value[0]) target_block = local_op(*data_blocks) if (not isinstance(target_block, numpy.ndarray) or target_block.shape != blocksize): raise ValueError( "Expected `local_op` to return a numpy.ndarray of " "shape %s but got this instead: %s" % ( blocksize, target_block)) target_band.WriteArray( target_block, yoff=block_offset['yoff'], xoff=block_offset['xoff']) # send result to stats calculator if stats_worker_queue: # guard against an undefined nodata target if nodata_target is not None: target_block = target_block[target_block != nodata_target] target_block = target_block.astype(numpy.float64).flatten() if sys.version_info >= (3, 8) and use_shared_memory: shared_memory_array = numpy.ndarray( target_block.shape, dtype=target_block.dtype, buffer=shared_memory.buf) shared_memory_array[:] = target_block[:] stats_worker_queue.put(( shared_memory_array.shape, shared_memory_array.dtype, shared_memory)) else: stats_worker_queue.put(target_block) pixels_processed += blocksize[0] * blocksize[1] last_time = _invoke_timed_callback( last_time, lambda: LOGGER.info( '%.1f%% complete', float(pixels_processed) / n_pixels * 100.0), _LOGGING_PERIOD) LOGGER.info('100.0% complete') if calc_raster_stats: LOGGER.info("Waiting for raster stats worker result.") stats_worker_thread.join(max_timeout) if stats_worker_thread.is_alive(): LOGGER.error("stats_worker_thread.join() timed out") raise RuntimeError("stats_worker_thread.join() timed out") payload = stats_worker_queue.get(True, max_timeout) if payload is not None: target_min, target_max, target_mean, target_stddev = payload target_band.SetStatistics( float(target_min), float(target_max), float(target_mean), float(target_stddev)) target_band.FlushCache() except Exception: LOGGER.exception('exception encountered in raster_calculator') raise finally: # This block ensures that rasters are destroyed even if there's an # exception raised. base_band_list[:] = [] base_raster_list[:] = [] target_band.FlushCache() target_band = None target_raster.FlushCache() target_raster = None if calc_raster_stats and stats_worker_thread: if stats_worker_thread.is_alive(): stats_worker_queue.put(None, True, max_timeout) LOGGER.info("Waiting for raster stats worker result.") stats_worker_thread.join(max_timeout) if stats_worker_thread.is_alive(): LOGGER.error("stats_worker_thread.join() timed out") raise RuntimeError( "stats_worker_thread.join() timed out") if sys.version_info >= (3, 8) and use_shared_memory: LOGGER.debug( f'unlink shared memory for process {os.getpid()}') shared_memory.close() shared_memory.unlink() LOGGER.debug( f'unlinked shared memory for process {os.getpid()}') # check for an exception in the workers, otherwise get result # and pass to writer try: exception = exception_queue.get_nowait() LOGGER.error("Exception encountered at termination.") raise exception except queue.Empty: pass
[docs]def align_and_resize_raster_stack( base_raster_path_list, target_raster_path_list, resample_method_list, target_pixel_size, bounding_box_mode, base_vector_path_list=None, raster_align_index=None, base_projection_wkt_list=None, target_projection_wkt=None, vector_mask_options=None, gdal_warp_options=None, raster_driver_creation_tuple=DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS, osr_axis_mapping_strategy=DEFAULT_OSR_AXIS_MAPPING_STRATEGY): """Generate rasters from a base such that they align geospatially. This function resizes base rasters that are in the same geospatial projection such that the result is an aligned stack of rasters that have the same cell size, dimensions, and bounding box. This is achieved by clipping or resizing the rasters to intersected, unioned, or equivocated bounding boxes of all the raster and vector input. Args: base_raster_path_list (sequence): a sequence of base raster paths that will be transformed and will be used to determine the target bounding box. target_raster_path_list (sequence): a sequence of raster paths that will be created to one-to-one map with ``base_raster_path_list`` as aligned versions of those original rasters. If there are duplicate paths in this list, the function will raise a ValueError. resample_method_list (sequence): a sequence of resampling methods which one to one map each path in ``base_raster_path_list`` during resizing. Each element must be one of "near|bilinear|cubic|cubicspline|lanczos|mode". target_pixel_size (list/tuple): the target raster's x and y pixel size example: (30, -30). bounding_box_mode (string): one of "union", "intersection", or a sequence of floats of the form [minx, miny, maxx, maxy] in the target projection coordinate system. Depending on the value, output extents are defined as the union, intersection, or the explicit bounding box. base_vector_path_list (sequence): a sequence of base vector paths whose bounding boxes will be used to determine the final bounding box of the raster stack if mode is 'union' or 'intersection'. If mode is 'bb=[...]' then these vectors are not used in any calculation. raster_align_index (int): indicates the index of a raster in ``base_raster_path_list`` that the target rasters' bounding boxes pixels should align with. This feature allows rasters whose raster dimensions are the same, but bounding boxes slightly shifted less than a pixel size to align with a desired grid layout. If ``None`` then the bounding box of the target rasters is calculated as the precise intersection, union, or bounding box. base_projection_wkt_list (sequence): if not None, this is a sequence of base projections of the rasters in ``base_raster_path_list``. If a value is ``None`` the ``base_sr`` is assumed to be whatever is defined in that raster. This value is useful if there are rasters with no projection defined, but otherwise known. target_projection_wkt (string): if not None, this is the desired projection of all target rasters in Well Known Text format. If None, the base SRS will be passed to the target. vector_mask_options (dict): optional, if not None, this is a dictionary of options to use an existing vector's geometry to mask out pixels in the target raster that do not overlap the vector's geometry. Keys to this dictionary are: * ``'mask_vector_path'`` (str): path to the mask vector file. This vector will be automatically projected to the target projection if its base coordinate system does not match the target. * ``'mask_layer_name'`` (str): the layer name to use for masking. If this key is not in the dictionary the default is to use the layer at index 0. * ``'mask_vector_where_filter'`` (str): an SQL WHERE string. This will be used to filter the geometry in the mask. Ex: ``'id > 10'`` would use all features whose field value of 'id' is > 10. gdal_warp_options (sequence): if present, the contents of this list are passed to the ``warpOptions`` parameter of ``gdal.Warp``. See the `GDAL Warp documentation <https://gdal.org/api/gdalwarp_cpp.html#_CPPv415GDALWarpOptions>`_ for valid options. raster_driver_creation_tuple (tuple): a tuple containing a GDAL driver name string as the first element and a GDAL creation options tuple/list as the second. Defaults to a GTiff driver tuple defined at geoprocessing.DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS. osr_axis_mapping_strategy (int): OSR axis mapping strategy for ``SpatialReference`` objects. Defaults to ``geoprocessing.DEFAULT_OSR_AXIS_MAPPING_STRATEGY``. This parameter should not be changed unless you know what you are doing. Return: None Raises: ValueError If any combination of the raw bounding boxes, raster bounding boxes, vector bounding boxes, and/or vector_mask bounding box does not overlap to produce a valid target. ValueError If any of the input or target lists are of different lengths. ValueError If there are duplicate paths on the target list which would risk corrupted output. ValueError If some combination of base, target, and embedded source reference systems results in an ambiguous target coordinate system. ValueError If ``vector_mask_options`` is not None but the ``mask_vector_path`` is undefined or doesn't point to a valid file. ValueError If ``pixel_size`` is not a 2 element sequence of numbers. """ # make sure that the input lists are of the same length list_lengths = [ len(base_raster_path_list), len(target_raster_path_list), len(resample_method_list)] if len(set(list_lengths)) != 1: raise ValueError( "base_raster_path_list, target_raster_path_list, and " "resample_method_list must be the same length " " current lengths are %s" % (str(list_lengths))) unique_targets = set(target_raster_path_list) if len(unique_targets) != len(target_raster_path_list): seen = set() duplicate_list = [] for path in target_raster_path_list: if path not in seen: seen.add(path) else: duplicate_list.append(path) raise ValueError( "There are duplicated paths on the target list. This is an " "invalid state of ``target_path_list``. Duplicates: %s" % ( duplicate_list)) # we can accept 'union', 'intersection', or a 4 element list/tuple if bounding_box_mode not in ["union", "intersection"] and ( not isinstance(bounding_box_mode, (list, tuple)) or len(bounding_box_mode) != 4): raise ValueError("Unknown bounding_box_mode %s" % ( str(bounding_box_mode))) n_rasters = len(base_raster_path_list) if ((raster_align_index is not None) and ((raster_align_index < 0) or (raster_align_index >= n_rasters))): raise ValueError( "Alignment index is out of bounds of the datasets index: %s" " n_elements %s" % (raster_align_index, n_rasters)) _assert_is_valid_pixel_size(target_pixel_size) # used to get bounding box, projection, and possible alignment info raster_info_list = [ get_raster_info(path) for path in base_raster_path_list] # get the literal or intersecting/unioned bounding box if isinstance(bounding_box_mode, (list, tuple)): # if it's a sequence or tuple, it must be a manual bounding box LOGGER.debug( "assuming manual bounding box mode of %s", bounding_box_mode) target_bounding_box = bounding_box_mode else: # either intersection or union, get list of bounding boxes, reproject # if necessary, and reduce to a single box if base_vector_path_list is not None: # vectors are only interesting for their bounding boxes, that's # this construction is inside an else. vector_info_list = [ get_vector_info(path) for path in base_vector_path_list] else: vector_info_list = [] raster_bounding_box_list = [] for raster_index, raster_info in enumerate(raster_info_list): # this block calculates the base projection of ``raster_info`` if # ``target_projection_wkt`` is defined, thus implying a # reprojection will be necessary. if target_projection_wkt: if base_projection_wkt_list and \ base_projection_wkt_list[raster_index]: # a base is defined, use that base_raster_projection_wkt = \ base_projection_wkt_list[raster_index] else: # otherwise use the raster's projection and there must # be one since we're reprojecting base_raster_projection_wkt = raster_info['projection_wkt'] if not base_raster_projection_wkt: raise ValueError( "no projection for raster %s" % base_raster_path_list[raster_index]) # since the base spatial reference is potentially different # than the target, we need to transform the base bounding # box into target coordinates so later we can calculate # accurate bounding box overlaps in the target coordinate # system raster_bounding_box_list.append( transform_bounding_box( raster_info['bounding_box'], base_raster_projection_wkt, target_projection_wkt)) else: raster_bounding_box_list.append(raster_info['bounding_box']) # include the vector bounding box information to make a global list # of target bounding boxes bounding_box_list = [ vector_info['bounding_box'] if target_projection_wkt is None else transform_bounding_box( vector_info['bounding_box'], vector_info['projection_wkt'], target_projection_wkt) for vector_info in vector_info_list] + raster_bounding_box_list target_bounding_box = merge_bounding_box_list( bounding_box_list, bounding_box_mode) if vector_mask_options: # ensure the mask exists and intersects with the target bounding box if 'mask_vector_path' not in vector_mask_options: raise ValueError( 'vector_mask_options passed, but no value for ' '"mask_vector_path": %s', vector_mask_options) mask_vector_info = get_vector_info( vector_mask_options['mask_vector_path']) if 'mask_vector_where_filter' in vector_mask_options: # the bounding box only exists for the filtered features mask_vector = gdal.OpenEx( vector_mask_options['mask_vector_path'], gdal.OF_VECTOR) mask_layer = mask_vector.GetLayer() mask_layer.SetAttributeFilter( vector_mask_options['mask_vector_where_filter']) mask_bounding_box = merge_bounding_box_list( [[feature.GetGeometryRef().GetEnvelope()[i] for i in [0, 2, 1, 3]] for feature in mask_layer], 'union') mask_layer = None mask_vector = None else: # if no where filter then use the raw vector bounding box mask_bounding_box = mask_vector_info['bounding_box'] mask_vector_projection_wkt = mask_vector_info['projection_wkt'] if mask_vector_projection_wkt is not None and \ target_projection_wkt is not None: mask_vector_bb = transform_bounding_box( mask_bounding_box, mask_vector_info['projection_wkt'], target_projection_wkt) else: mask_vector_bb = mask_vector_info['bounding_box'] # Calling `merge_bounding_box_list` will raise an ValueError if the # bounding box of the mask and the target do not intersect. The # result is otherwise not used. _ = merge_bounding_box_list( [target_bounding_box, mask_vector_bb], 'intersection') if raster_align_index is not None and raster_align_index >= 0: # bounding box needs alignment align_bounding_box = ( raster_info_list[raster_align_index]['bounding_box']) align_pixel_size = ( raster_info_list[raster_align_index]['pixel_size']) # adjust bounding box so lower left corner aligns with a pixel in # raster[raster_align_index] for index in [0, 1]: n_pixels = int( (target_bounding_box[index] - align_bounding_box[index]) / float(align_pixel_size[index])) target_bounding_box[index] = ( n_pixels * align_pixel_size[index] + align_bounding_box[index]) for index, (base_path, target_path, resample_method) in enumerate(zip( base_raster_path_list, target_raster_path_list, resample_method_list)): warp_raster( base_path, target_pixel_size, target_path, resample_method, target_bb=target_bounding_box, raster_driver_creation_tuple=(raster_driver_creation_tuple), target_projection_wkt=target_projection_wkt, base_projection_wkt=( None if not base_projection_wkt_list else base_projection_wkt_list[index]), vector_mask_options=vector_mask_options, gdal_warp_options=gdal_warp_options) LOGGER.info( '%d of %d aligned: %s', index+1, n_rasters, os.path.basename(target_path)) LOGGER.info("aligned all %d rasters.", n_rasters)
[docs]def new_raster_from_base( base_path, target_path, datatype, band_nodata_list, fill_value_list=None, n_rows=None, n_cols=None, raster_driver_creation_tuple=DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS): """Create new raster by coping spatial reference/geotransform of base. A convenience function to simplify the creation of a new raster from the basis of an existing one. Depending on the input mode, one can create a new raster of the same dimensions, geotransform, and georeference as the base. Other options are provided to change the raster dimensions, number of bands, nodata values, data type, and core raster creation options. Args: base_path (string): path to existing raster. target_path (string): path to desired target raster. datatype: the pixel datatype of the output raster, for example gdal.GDT_Float32. See the following header file for supported pixel types: http://www.gdal.org/gdal_8h.html#22e22ce0a55036a96f652765793fb7a4 band_nodata_list (sequence): list of nodata values, one for each band, to set on target raster. If value is 'None' the nodata value is not set for that band. The number of target bands is inferred from the length of this list. fill_value_list (sequence): list of values to fill each band with. If None, no filling is done. n_rows (int): if not None, defines the number of target raster rows. n_cols (int): if not None, defines the number of target raster columns. raster_driver_creation_tuple (tuple): a tuple containing a GDAL driver name string as the first element and a GDAL creation options tuple/list as the second. Defaults to a GTiff driver tuple defined at geoprocessing.DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS. Return: None """ base_raster = gdal.OpenEx(base_path, gdal.OF_RASTER) if n_rows is None: n_rows = base_raster.RasterYSize if n_cols is None: n_cols = base_raster.RasterXSize driver = gdal.GetDriverByName(raster_driver_creation_tuple[0]) local_raster_creation_options = list(raster_driver_creation_tuple[1]) # PIXELTYPE is sometimes used to define signed vs. unsigned bytes and # the only place that is stored is in the IMAGE_STRUCTURE metadata # copy it over if it exists and it not already defined by the input # creation options. It's okay to get this info from the first band since # all bands have the same datatype base_band = base_raster.GetRasterBand(1) metadata = base_band.GetMetadata('IMAGE_STRUCTURE') if 'PIXELTYPE' in metadata and not any( ['PIXELTYPE' in option for option in local_raster_creation_options]): local_raster_creation_options.append( 'PIXELTYPE=' + metadata['PIXELTYPE']) block_size = base_band.GetBlockSize() # It's not clear how or IF we can determine if the output should be # striped or tiled. Here we leave it up to the default inputs or if its # obviously not striped we tile. if not any( ['TILED' in option for option in local_raster_creation_options]): # TILED not set, so lets try to set it to a reasonable value if block_size[0] != n_cols: # if x block is not the width of the raster it *must* be tiled # otherwise okay if it's striped or tiled, I can't construct a # test case to cover this, but there is nothing in the spec that # restricts this so I have it just in case. local_raster_creation_options.append('TILED=YES') if not any( ['BLOCK' in option for option in local_raster_creation_options]): # not defined, so lets copy what we know from the current raster local_raster_creation_options.extend([ 'BLOCKXSIZE=%d' % block_size[0], 'BLOCKYSIZE=%d' % block_size[1]]) # make target directory if it doesn't exist try: os.makedirs(os.path.dirname(target_path)) except OSError: pass base_band = None n_bands = len(band_nodata_list) target_raster = driver.Create( target_path, n_cols, n_rows, n_bands, datatype, options=local_raster_creation_options) target_raster.SetProjection(base_raster.GetProjection()) target_raster.SetGeoTransform(base_raster.GetGeoTransform()) base_raster = None for index, nodata_value in enumerate(band_nodata_list): if nodata_value is None: continue target_band = target_raster.GetRasterBand(index + 1) try: target_band.SetNoDataValue(nodata_value.item()) except AttributeError: target_band.SetNoDataValue(nodata_value) target_raster.FlushCache() last_time = time.time() pixels_processed = 0 n_pixels = n_cols * n_rows numpy_dtype = _GDAL_TYPE_TO_NUMPY_LOOKUP[datatype] if fill_value_list is not None: for index, fill_value in enumerate(fill_value_list): if fill_value is None: continue target_band = target_raster.GetRasterBand(index + 1) # some rasters are very large and a fill can appear to cause # computation to hang. This block, though possibly slightly less # efficient than ``band.Fill`` will give real-time feedback about # how the fill is progressing. for offsets in iterblocks((target_path, 1), offset_only=True): shape = (offsets['win_ysize'], offsets['win_xsize']) fill_array = numpy.full(shape, fill_value, dtype=numpy_dtype) pixels_processed += ( offsets['win_ysize'] * offsets['win_xsize']) target_band.WriteArray( fill_array, offsets['xoff'], offsets['yoff']) last_time = _invoke_timed_callback( last_time, lambda: LOGGER.info( f'filling new raster {target_path} with {fill_value} ' f'-- {float(pixels_processed)/n_pixels*100.0:.2f}% ' f'complete'), _LOGGING_PERIOD) target_band = None target_band = None target_raster = None
[docs]def create_raster_from_vector_extents( base_vector_path, target_raster_path, target_pixel_size, target_pixel_type, target_nodata, fill_value=None, raster_driver_creation_tuple=DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS): """Create a blank raster based on a vector file extent. Args: base_vector_path (string): path to vector shapefile to base the bounding box for the target raster. target_raster_path (string): path to location of generated geotiff; the upper left hand corner of this raster will be aligned with the bounding box of the source vector and the extent will be exactly equal or contained the source vector's bounding box depending on whether the pixel size divides evenly into the source bounding box; if not coordinates will be rounded up to contain the original extent. target_pixel_size (list/tuple): the x/y pixel size as a sequence Example:: [30.0, -30.0] target_pixel_type (int): gdal GDT pixel type of target raster target_nodata (numeric): target nodata value. Can be None if no nodata value is needed. fill_value (int/float): value to fill in the target raster; no fill if value is None raster_driver_creation_tuple (tuple): a tuple containing a GDAL driver name string as the first element and a GDAL creation options tuple/list as the second. Defaults to a GTiff driver tuple defined at geoprocessing.DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS. Return: None """ if target_pixel_type not in _VALID_GDAL_TYPES: raise ValueError( f'Invalid target type, should be a gdal.GDT_* type, received ' f'"{target_pixel_type}"') # Determine the width and height of the tiff in pixels based on the # maximum size of the combined envelope of all the features vector = gdal.OpenEx(base_vector_path, gdal.OF_VECTOR) shp_extent = None for layer_index in range(vector.GetLayerCount()): layer = vector.GetLayer(layer_index) for feature in layer: try: # envelope is [xmin, xmax, ymin, ymax] feature_extent = feature.GetGeometryRef().GetEnvelope() if shp_extent is None: shp_extent = list(feature_extent) else: # expand bounds of current bounding box to include that # of the newest feature shp_extent = [ f(shp_extent[index], feature_extent[index]) for index, f in enumerate([min, max, min, max])] except AttributeError as error: # For some valid OGR objects the geometry can be undefined # since it's valid to have a NULL entry in the attribute table # this is expressed as a None value in the geometry reference # this feature won't contribute LOGGER.warning(error) layer = None if shp_extent is None: raise ValueError( f'the vector at {base_vector_path} has no geometry, cannot ' f'create a raster from these extents') # round up on the rows and cols so that the target raster encloses the # base vector n_cols = int(numpy.ceil( abs((shp_extent[1] - shp_extent[0]) / target_pixel_size[0]))) n_cols = max(1, n_cols) n_rows = int(numpy.ceil( abs((shp_extent[3] - shp_extent[2]) / target_pixel_size[1]))) n_rows = max(1, n_rows) driver = gdal.GetDriverByName(raster_driver_creation_tuple[0]) n_bands = 1 raster = driver.Create( target_raster_path, n_cols, n_rows, n_bands, target_pixel_type, options=raster_driver_creation_tuple[1]) raster.GetRasterBand(1).SetNoDataValue(target_nodata) # Set the transform based on the upper left corner and given pixel # dimensions if target_pixel_size[0] < 0: x_source = shp_extent[1] else: x_source = shp_extent[0] if target_pixel_size[1] < 0: y_source = shp_extent[3] else: y_source = shp_extent[2] raster_transform = [ x_source, target_pixel_size[0], 0.0, y_source, 0.0, target_pixel_size[1]] raster.SetGeoTransform(raster_transform) # Use the same projection on the raster as the shapefile raster.SetProjection(vector.GetLayer(0).GetSpatialRef().ExportToWkt()) # Initialize everything to nodata if fill_value is not None: band = raster.GetRasterBand(1) band.Fill(fill_value) band = None vector = None raster = None
[docs]def interpolate_points( base_vector_path, vector_attribute_field, target_raster_path_band, interpolation_mode): """Interpolate point values onto an existing raster. Args: base_vector_path (string): path to a shapefile that contains point vector layers. vector_attribute_field (field): a string in the vector referenced at ``base_vector_path`` that refers to a numeric value in the vector's attribute table. This is the value that will be interpolated across the raster. target_raster_path_band (tuple): a path/band number tuple to an existing raster which likely intersects or is nearby the source vector. The band in this raster will take on the interpolated numerical values provided at each point. interpolation_mode (string): the interpolation method to use for scipy.interpolate.griddata, one of 'linear', near', or 'cubic'. Return: None """ source_vector = gdal.OpenEx(base_vector_path, gdal.OF_VECTOR) point_list = [] value_list = [] for layer_index in range(source_vector.GetLayerCount()): layer = source_vector.GetLayer(layer_index) for point_feature in layer: value = point_feature.GetField(vector_attribute_field) # Add in the numpy notation which is row, col # Here the point geometry is in the form x, y (col, row) geometry = point_feature.GetGeometryRef() point = geometry.GetPoint() point_list.append([point[1], point[0]]) value_list.append(value) point_array = numpy.array(point_list) value_array = numpy.array(value_list) # getting the offsets first before the raster is opened in update mode offset_list = list( iterblocks(target_raster_path_band, offset_only=True)) target_raster = gdal.OpenEx( target_raster_path_band[0], gdal.OF_RASTER | gdal.GA_Update) band = target_raster.GetRasterBand(target_raster_path_band[1]) nodata = band.GetNoDataValue() geotransform = target_raster.GetGeoTransform() for offset in offset_list: grid_y, grid_x = numpy.mgrid[ offset['yoff']:offset['yoff']+offset['win_ysize'], offset['xoff']:offset['xoff']+offset['win_xsize']] grid_y = grid_y * geotransform[5] + geotransform[3] grid_x = grid_x * geotransform[1] + geotransform[0] # this is to be consistent with GDAL 2.0's change of 'nearest' to # 'near' for an interpolation scheme that SciPy did not change. if interpolation_mode == 'near': interpolation_mode = 'nearest' raster_out_array = scipy.interpolate.griddata( point_array, value_array, (grid_y, grid_x), interpolation_mode, nodata) band.WriteArray(raster_out_array, offset['xoff'], offset['yoff'])
[docs]def zonal_statistics( base_raster_path_band, aggregate_vector_path, aggregate_layer_name=None, ignore_nodata=True, polygons_might_overlap=True, working_dir=None): """Collect stats on pixel values which lie within polygons. This function summarizes raster statistics including min, max, mean, and pixel count over the regions on the raster that are overlapped by the polygons in the vector layer. Statistics are calculated in two passes, where first polygons aggregate over pixels in the raster whose centers intersect with the polygon. In the second pass, any polygons that are not aggregated use their bounding box to intersect with the raster for overlap statistics. Note: There may be some degenerate cases where the bounding box vs. actual geometry intersection would be incorrect, but these are so unlikely as to be manually constructed. If you encounter one of these please email the description and dataset to richsharp@stanford.edu. Args: base_raster_path_band (tuple): a str/int tuple indicating the path to the base raster and the band index of that raster to analyze. aggregate_vector_path (string): a path to a polygon vector whose geometric features indicate the areas in ``base_raster_path_band`` to calculate zonal statistics. aggregate_layer_name (string): name of shapefile layer that will be used to aggregate results over. If set to None, the first layer in the DataSource will be used as retrieved by ``.GetLayer()``. Note: it is normal and expected to set this field at None if the aggregating shapefile is a single layer as many shapefiles, including the common 'ESRI Shapefile', are. ignore_nodata: if true, then nodata pixels are not accounted for when calculating min, max, count, or mean. However, the value of ``nodata_count`` will always be the number of nodata pixels aggregated under the polygon. polygons_might_overlap (boolean): if True the function calculates aggregation coverage close to optimally by rasterizing sets of polygons that don't overlap. However, this step can be computationally expensive for cases where there are many polygons. this flag to False directs the function rasterize in one step. working_dir (string): If not None, indicates where temporary files should be created during this run. Return: nested dictionary indexed by aggregating feature id, and then by one of 'min' 'max' 'sum' 'count' and 'nodata_count'. Example:: {0: {'min': 0, 'max': 1, 'sum': 1.7, 'count': 3, 'nodata_count': 1 } } Raises: ValueError if ``base_raster_path_band`` is incorrectly formatted. RuntimeError if the aggregate vector or layer cannot open. """ if not _is_raster_path_band_formatted(base_raster_path_band): raise ValueError( "`base_raster_path_band` not formatted as expected. Expects " "(path, band_index), received %s" % repr(base_raster_path_band)) aggregate_vector = gdal.OpenEx(aggregate_vector_path, gdal.OF_VECTOR) if aggregate_vector is None: raise RuntimeError( "Could not open aggregate vector at %s" % aggregate_vector_path) if aggregate_layer_name is not None: aggregate_layer = aggregate_vector.GetLayerByName( aggregate_layer_name) else: aggregate_layer = aggregate_vector.GetLayer() if aggregate_layer is None: raise RuntimeError( "Could not open layer %s on %s" % ( aggregate_layer_name, aggregate_vector_path)) # create a new aggregate ID field to map base vector aggregate fields to # local ones that are guaranteed to be integers. local_aggregate_field_name = 'original_fid' rasterize_layer_args = { 'options': [ 'ALL_TOUCHED=FALSE', 'ATTRIBUTE=%s' % local_aggregate_field_name] } # clip base raster to aggregating vector intersection raster_info = get_raster_info(base_raster_path_band[0]) # -1 here because bands are 1 indexed raster_nodata = raster_info['nodata'][base_raster_path_band[1]-1] temp_working_dir = tempfile.mkdtemp(dir=working_dir) clipped_raster_path = os.path.join( temp_working_dir, 'clipped_raster.tif') try: align_and_resize_raster_stack( [base_raster_path_band[0]], [clipped_raster_path], ['near'], raster_info['pixel_size'], 'intersection', base_vector_path_list=[aggregate_vector_path], raster_align_index=0) clipped_raster = gdal.OpenEx(clipped_raster_path, gdal.OF_RASTER) clipped_band = clipped_raster.GetRasterBand(base_raster_path_band[1]) except ValueError as e: if 'Bounding boxes do not intersect' in repr(e): LOGGER.error( "aggregate vector %s does not intersect with the raster %s", aggregate_vector_path, base_raster_path_band) aggregate_stats = collections.defaultdict( lambda: { 'min': None, 'max': None, 'count': 0, 'nodata_count': 0, 'sum': 0.0}) for feature in aggregate_layer: _ = aggregate_stats[feature.GetFID()] return dict(aggregate_stats) else: # this would be very unexpected to get here, but if it happened # and we didn't raise an exception, execution could get weird. raise # make a shapefile that non-overlapping layers can be added to driver = ogr.GetDriverByName('MEMORY') disjoint_vector = driver.CreateDataSource('disjoint_vector') spat_ref = aggregate_layer.GetSpatialRef() # Initialize these dictionaries to have the shapefile fields in the # original datasource even if we don't pick up a value later LOGGER.info("build a lookup of aggregate field value to FID") aggregate_layer_fid_set = set( [agg_feat.GetFID() for agg_feat in aggregate_layer]) agg_feat = None # Loop over each polygon and aggregate if polygons_might_overlap: LOGGER.info("creating disjoint polygon set") disjoint_fid_sets = calculate_disjoint_polygon_set( aggregate_vector_path, bounding_box=raster_info['bounding_box']) else: disjoint_fid_sets = [aggregate_layer_fid_set] agg_fid_raster_path = os.path.join( temp_working_dir, 'agg_fid.tif') agg_fid_nodata = -1 new_raster_from_base( clipped_raster_path, agg_fid_raster_path, gdal.GDT_Int32, [agg_fid_nodata]) # fetch the block offsets before the raster is opened for writing agg_fid_offset_list = list( iterblocks((agg_fid_raster_path, 1), offset_only=True)) agg_fid_raster = gdal.OpenEx( agg_fid_raster_path, gdal.GA_Update | gdal.OF_RASTER) aggregate_stats = collections.defaultdict(lambda: { 'min': None, 'max': None, 'count': 0, 'nodata_count': 0, 'sum': 0.0}) last_time = time.time() LOGGER.info("processing %d disjoint polygon sets", len(disjoint_fid_sets)) for set_index, disjoint_fid_set in enumerate(disjoint_fid_sets): last_time = _invoke_timed_callback( last_time, lambda: LOGGER.info( "zonal stats approximately %.1f%% complete on %s", 100.0 * float(set_index+1) / len(disjoint_fid_sets), os.path.basename(aggregate_vector_path)), _LOGGING_PERIOD) disjoint_layer = disjoint_vector.CreateLayer( 'disjoint_vector', spat_ref, ogr.wkbPolygon) disjoint_layer.CreateField( ogr.FieldDefn(local_aggregate_field_name, ogr.OFTInteger)) disjoint_layer_defn = disjoint_layer.GetLayerDefn() # add polygons to subset_layer disjoint_layer.StartTransaction() for index, feature_fid in enumerate(disjoint_fid_set): last_time = _invoke_timed_callback( last_time, lambda: LOGGER.info( "polygon set %d of %d approximately %.1f%% processed " "on %s", set_index+1, len(disjoint_fid_sets), 100.0 * float(index+1) / len(disjoint_fid_set), os.path.basename(aggregate_vector_path)), _LOGGING_PERIOD) agg_feat = aggregate_layer.GetFeature(feature_fid) agg_geom_ref = agg_feat.GetGeometryRef() disjoint_feat = ogr.Feature(disjoint_layer_defn) disjoint_feat.SetGeometry(agg_geom_ref.Clone()) agg_geom_ref = None disjoint_feat.SetField( local_aggregate_field_name, feature_fid) disjoint_layer.CreateFeature(disjoint_feat) agg_feat = None disjoint_layer.CommitTransaction() LOGGER.info( "disjoint polygon set %d of %d 100.0%% processed on %s", set_index+1, len(disjoint_fid_sets), os.path.basename( aggregate_vector_path)) # nodata out the mask agg_fid_band = agg_fid_raster.GetRasterBand(1) agg_fid_band.Fill(agg_fid_nodata) LOGGER.info( "rasterizing disjoint polygon set %d of %d %s", set_index+1, len(disjoint_fid_sets), os.path.basename(aggregate_vector_path)) rasterize_callback = _make_logger_callback( "rasterizing polygon " + str(set_index+1) + " of " + str(len(disjoint_fid_set)) + " set %.1f%% complete %s") gdal.RasterizeLayer( agg_fid_raster, [1], disjoint_layer, callback=rasterize_callback, **rasterize_layer_args) agg_fid_raster.FlushCache() # Delete the features we just added to the subset_layer disjoint_layer = None disjoint_vector.DeleteLayer(0) # create a key array # and parallel min, max, count, and nodata count arrays LOGGER.info( "summarizing rasterized disjoint polygon set %d of %d %s", set_index+1, len(disjoint_fid_sets), os.path.basename(aggregate_vector_path)) for agg_fid_offset in agg_fid_offset_list: agg_fid_block = agg_fid_band.ReadAsArray(**agg_fid_offset) clipped_block = clipped_band.ReadAsArray(**agg_fid_offset) valid_mask = (agg_fid_block != agg_fid_nodata) valid_agg_fids = agg_fid_block[valid_mask] valid_clipped = clipped_block[valid_mask] for agg_fid in numpy.unique(valid_agg_fids): masked_clipped_block = valid_clipped[ valid_agg_fids == agg_fid] if raster_nodata is not None: clipped_nodata_mask = numpy.isclose( masked_clipped_block, raster_nodata) else: clipped_nodata_mask = numpy.zeros( masked_clipped_block.shape, dtype=bool) aggregate_stats[agg_fid]['nodata_count'] += ( numpy.count_nonzero(clipped_nodata_mask)) if ignore_nodata: masked_clipped_block = ( masked_clipped_block[~clipped_nodata_mask]) if masked_clipped_block.size == 0: continue if aggregate_stats[agg_fid]['min'] is None: aggregate_stats[agg_fid]['min'] = ( masked_clipped_block[0]) aggregate_stats[agg_fid]['max'] = ( masked_clipped_block[0]) aggregate_stats[agg_fid]['min'] = min( numpy.min(masked_clipped_block), aggregate_stats[agg_fid]['min']) aggregate_stats[agg_fid]['max'] = max( numpy.max(masked_clipped_block), aggregate_stats[agg_fid]['max']) aggregate_stats[agg_fid]['count'] += ( masked_clipped_block.size) aggregate_stats[agg_fid]['sum'] += numpy.sum( masked_clipped_block) unset_fids = aggregate_layer_fid_set.difference(aggregate_stats) LOGGER.debug( "unset_fids: %s of %s ", len(unset_fids), len(aggregate_layer_fid_set)) clipped_gt = numpy.array( clipped_raster.GetGeoTransform(), dtype=numpy.float32) LOGGER.debug("gt %s for %s", clipped_gt, base_raster_path_band) for unset_fid in unset_fids: unset_feat = aggregate_layer.GetFeature(unset_fid) unset_geom_ref = unset_feat.GetGeometryRef() if unset_geom_ref is None: LOGGER.warning( f'no geometry in {aggregate_vector_path} FID: {unset_fid}') continue unset_geom_envelope = list(unset_geom_ref.GetEnvelope()) unset_geom_ref = None unset_feat = None if clipped_gt[1] < 0: unset_geom_envelope[0], unset_geom_envelope[1] = ( unset_geom_envelope[1], unset_geom_envelope[0]) if clipped_gt[5] < 0: unset_geom_envelope[2], unset_geom_envelope[3] = ( unset_geom_envelope[3], unset_geom_envelope[2]) xoff = int((unset_geom_envelope[0] - clipped_gt[0]) / clipped_gt[1]) yoff = int((unset_geom_envelope[2] - clipped_gt[3]) / clipped_gt[5]) win_xsize = int(numpy.ceil( (unset_geom_envelope[1] - clipped_gt[0]) / clipped_gt[1])) - xoff win_ysize = int(numpy.ceil( (unset_geom_envelope[3] - clipped_gt[3]) / clipped_gt[5])) - yoff # clamp offset to the side of the raster if it's negative if xoff < 0: win_xsize += xoff xoff = 0 if yoff < 0: win_ysize += yoff yoff = 0 # clamp the window to the side of the raster if too big if xoff+win_xsize > clipped_band.XSize: win_xsize = clipped_band.XSize-xoff if yoff+win_ysize > clipped_band.YSize: win_ysize = clipped_band.YSize-yoff if win_xsize <= 0 or win_ysize <= 0: continue # here we consider the pixels that intersect with the geometry's # bounding box as being the proxy for the intersection with the # polygon itself. This is not a bad approximation since the case # that caused the polygon to be skipped in the first phase is that it # is as small as a pixel. There could be some degenerate cases that # make this estimation very wrong, but we do not know of any that # would come from natural data. If you do encounter such a dataset # please email the description and datset to richsharp@stanford.edu. unset_fid_block = clipped_band.ReadAsArray( xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize) if raster_nodata is not None: unset_fid_nodata_mask = numpy.isclose( unset_fid_block, raster_nodata) else: unset_fid_nodata_mask = numpy.zeros( unset_fid_block.shape, dtype=bool) valid_unset_fid_block = unset_fid_block[~unset_fid_nodata_mask] if valid_unset_fid_block.size == 0: aggregate_stats[unset_fid]['min'] = 0.0 aggregate_stats[unset_fid]['max'] = 0.0 aggregate_stats[unset_fid]['sum'] = 0.0 else: aggregate_stats[unset_fid]['min'] = numpy.min( valid_unset_fid_block) aggregate_stats[unset_fid]['max'] = numpy.max( valid_unset_fid_block) aggregate_stats[unset_fid]['sum'] = numpy.sum( valid_unset_fid_block) aggregate_stats[unset_fid]['count'] = valid_unset_fid_block.size aggregate_stats[unset_fid]['nodata_count'] = numpy.count_nonzero( unset_fid_nodata_mask) unset_fids = aggregate_layer_fid_set.difference(aggregate_stats) LOGGER.debug( "remaining unset_fids: %s of %s ", len(unset_fids), len(aggregate_layer_fid_set)) # fill in the missing polygon fids in the aggregate stats by invoking the # accessor in the defaultdict for fid in unset_fids: _ = aggregate_stats[fid] LOGGER.info( "all done processing polygon sets for %s", os.path.basename( aggregate_vector_path)) # clean up temporary files spat_ref = None clipped_band = None clipped_raster = None agg_fid_raster = None disjoint_layer = None disjoint_vector = None aggregate_layer = None aggregate_vector = None shutil.rmtree(temp_working_dir) return dict(aggregate_stats)
[docs]def get_vector_info(vector_path, layer_id=0): """Get information about an GDAL vector. Args: vector_path (str): a path to a GDAL vector. layer_id (str/int): name or index of underlying layer to analyze. Defaults to 0. Raises: ValueError if ``vector_path`` does not exist on disk or cannot be opened as a gdal.OF_VECTOR. Return: raster_properties (dictionary): a dictionary with the following key-value pairs: * ``'projection_wkt'`` (string): projection of the vector in Well Known Text. * ``'bounding_box'`` (sequence): sequence of floats representing the bounding box in projected coordinates in the order [minx, miny, maxx, maxy]. * ``'file_list'`` (sequence): sequence of string paths to the files that make up this vector. """ vector = gdal.OpenEx(vector_path, gdal.OF_VECTOR) if not vector: raise ValueError( "Could not open %s as a gdal.OF_VECTOR" % vector_path) vector_properties = {} vector_properties['file_list'] = vector.GetFileList() layer = vector.GetLayer(iLayer=layer_id) # projection is same for all layers, so just use the first one spatial_ref = layer.GetSpatialRef() if spatial_ref: vector_projection_wkt = spatial_ref.ExportToWkt() else: vector_projection_wkt = None vector_properties['projection_wkt'] = vector_projection_wkt layer_bb = layer.GetExtent() layer = None vector = None # convert form [minx,maxx,miny,maxy] to [minx,miny,maxx,maxy] vector_properties['bounding_box'] = [layer_bb[i] for i in [0, 2, 1, 3]] return vector_properties
[docs]def get_raster_info(raster_path): """Get information about a GDAL raster (dataset). Args: raster_path (String): a path to a GDAL raster. Raises: ValueError if ``raster_path`` is not a file or cannot be opened as a ``gdal.OF_RASTER``. Return: raster_properties (dictionary): a dictionary with the properties stored under relevant keys. * ``'pixel_size'`` (tuple): (pixel x-size, pixel y-size) from geotransform. * ``'raster_size'`` (tuple): number of raster pixels in (x, y) direction. * ``'nodata'`` (sequence): a sequence of the nodata values in the bands of the raster in the same order as increasing band index. * ``'n_bands'`` (int): number of bands in the raster. * ``'geotransform'`` (tuple): a 6-tuple representing the geotransform of (x orign, x-increase, xy-increase, y origin, yx-increase, y-increase). * ``'datatype'`` (int): An instance of an enumerated gdal.GDT_* int that represents the datatype of the raster. * ``'projection_wkt'`` (string): projection of the raster in Well Known Text. * ``'bounding_box'`` (sequence): sequence of floats representing the bounding box in projected coordinates in the order [minx, miny, maxx, maxy] * ``'block_size'`` (tuple): underlying x/y raster block size for efficient reading. * ``'numpy_type'`` (numpy type): this is the equivalent numpy datatype for the raster bands including signed bytes. """ raster = gdal.OpenEx(raster_path, gdal.OF_RASTER) if not raster: raise ValueError( "Could not open %s as a gdal.OF_RASTER" % raster_path) raster_properties = {} raster_properties['file_list'] = raster.GetFileList() projection_wkt = raster.GetProjection() if not projection_wkt: projection_wkt = None raster_properties['projection_wkt'] = projection_wkt geo_transform = raster.GetGeoTransform() raster_properties['geotransform'] = geo_transform raster_properties['pixel_size'] = (geo_transform[1], geo_transform[5]) raster_properties['raster_size'] = ( raster.GetRasterBand(1).XSize, raster.GetRasterBand(1).YSize) raster_properties['n_bands'] = raster.RasterCount raster_properties['nodata'] = [ raster.GetRasterBand(index).GetNoDataValue() for index in range( 1, raster_properties['n_bands']+1)] # blocksize is the same for all bands, so we can just get the first raster_properties['block_size'] = raster.GetRasterBand(1).GetBlockSize() # we dont' really know how the geotransform is laid out, all we can do is # calculate the x and y bounds, then take the appropriate min/max x_bounds = [ geo_transform[0], geo_transform[0] + raster_properties['raster_size'][0] * geo_transform[1] + raster_properties['raster_size'][1] * geo_transform[2]] y_bounds = [ geo_transform[3], geo_transform[3] + raster_properties['raster_size'][0] * geo_transform[4] + raster_properties['raster_size'][1] * geo_transform[5]] raster_properties['bounding_box'] = [ numpy.min(x_bounds), numpy.min(y_bounds), numpy.max(x_bounds), numpy.max(y_bounds)] # datatype is the same for the whole raster, but is associated with band band = raster.GetRasterBand(1) band_datatype = band.DataType raster_properties['datatype'] = band_datatype raster_properties['numpy_type'] = ( _GDAL_TYPE_TO_NUMPY_LOOKUP[band_datatype]) # this part checks to see if the byte is signed or not if band_datatype == gdal.GDT_Byte: metadata = band.GetMetadata('IMAGE_STRUCTURE') if 'PIXELTYPE' in metadata and metadata['PIXELTYPE'] == 'SIGNEDBYTE': raster_properties['numpy_type'] = numpy.int8 band = None raster = None return raster_properties
[docs]def reproject_vector( base_vector_path, target_projection_wkt, target_path, layer_id=0, driver_name='ESRI Shapefile', copy_fields=True, osr_axis_mapping_strategy=DEFAULT_OSR_AXIS_MAPPING_STRATEGY): """Reproject OGR DataSource (vector). Transforms the features of the base vector to the desired output projection in a new ESRI Shapefile. Args: base_vector_path (string): Path to the base shapefile to transform. target_projection_wkt (string): the desired output projection in Well Known Text (by layer.GetSpatialRef().ExportToWkt()) target_path (string): the filepath to the transformed shapefile layer_id (str/int): name or index of layer in ``base_vector_path`` to reproject. Defaults to 0. driver_name (string): String to pass to ogr.GetDriverByName, defaults to 'ESRI Shapefile'. copy_fields (bool or iterable): If True, all the fields in ``base_vector_path`` will be copied to ``target_path`` during the reprojection step. If it is an iterable, it will contain the field names to exclusively copy. An unmatched fieldname will be ignored. If ``False`` no fields are copied into the new vector. osr_axis_mapping_strategy (int): OSR axis mapping strategy for ``SpatialReference`` objects. Defaults to ``geoprocessing.DEFAULT_OSR_AXIS_MAPPING_STRATEGY``. This parameter should not be changed unless you know what you are doing. Return: None """ base_vector = gdal.OpenEx(base_vector_path, gdal.OF_VECTOR) # if this file already exists, then remove it if os.path.isfile(target_path): LOGGER.warning( "%s already exists, removing and overwriting", target_path) os.remove(target_path) target_sr = osr.SpatialReference(target_projection_wkt) # create a new shapefile from the orginal_datasource target_driver = ogr.GetDriverByName(driver_name) target_vector = target_driver.CreateDataSource(target_path) layer = base_vector.GetLayer(layer_id) layer_dfn = layer.GetLayerDefn() # Create new layer for target_vector using same name and # geometry type from base vector but new projection target_layer = target_vector.CreateLayer( layer_dfn.GetName(), target_sr, layer_dfn.GetGeomType()) # this will map the target field index to the base index it came from # in case we don't need to copy all the fields target_to_base_field_id_map = {} if copy_fields: # Get the number of fields in original_layer original_field_count = layer_dfn.GetFieldCount() # For every field that's copying, create a duplicate field in the # new layer for fld_index in range(original_field_count): original_field = layer_dfn.GetFieldDefn(fld_index) field_name = original_field.GetName() if copy_fields is True or field_name in copy_fields: target_field = ogr.FieldDefn( field_name, original_field.GetType()) target_layer.CreateField(target_field) target_to_base_field_id_map[fld_index] = len( target_to_base_field_id_map) # Get the SR of the original_layer to use in transforming base_sr = layer.GetSpatialRef() base_sr.SetAxisMappingStrategy(osr_axis_mapping_strategy) target_sr.SetAxisMappingStrategy(osr_axis_mapping_strategy) # Create a coordinate transformation coord_trans = osr.CreateCoordinateTransformation(base_sr, target_sr) # Copy all of the features in layer to the new shapefile target_layer.StartTransaction() error_count = 0 last_time = time.time() LOGGER.info("starting reprojection") for feature_index, base_feature in enumerate(layer): last_time = _invoke_timed_callback( last_time, lambda: LOGGER.info( "reprojection approximately %.1f%% complete on %s", 100.0 * float(feature_index+1) / (layer.GetFeatureCount()), os.path.basename(target_path)), _LOGGING_PERIOD) geom = base_feature.GetGeometryRef() if geom is None: # we encountered this error occasionally when transforming clipped # global polygons. Not clear what is happening but perhaps a # feature was retained that otherwise wouldn't have been included # in the clip error_count += 1 continue # Transform geometry into format desired for the new projection error_code = geom.Transform(coord_trans) if error_code != 0: # error # this could be caused by an out of range transformation # whatever the case, don't put the transformed poly into the # output set error_count += 1 continue # Copy original_datasource's feature and set as new shapes feature target_feature = ogr.Feature(target_layer.GetLayerDefn()) target_feature.SetGeometry(geom) # For all the fields in the feature set the field values from the # source field for target_index, base_index in ( target_to_base_field_id_map.items()): target_feature.SetField( target_index, base_feature.GetField(base_index)) target_layer.CreateFeature(target_feature) target_feature = None base_feature = None target_layer.CommitTransaction() LOGGER.info( "reprojection 100.0%% complete on %s", os.path.basename(target_path)) if error_count > 0: LOGGER.warning( '%d features out of %d were unable to be transformed and are' ' not in the output vector at %s', error_count, layer.GetFeatureCount(), target_path) layer = None base_vector = None
[docs]def reclassify_raster( base_raster_path_band, value_map, target_raster_path, target_datatype, target_nodata, values_required=True, raster_driver_creation_tuple=DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS): """Reclassify pixel values in a raster. A function to reclassify values in raster to any output type. By default the values except for nodata must be in ``value_map``. Include the base raster nodata value in ``value_map`` to reclassify nodata pixels to a valid value. Args: base_raster_path_band (tuple): a tuple including file path to a raster and the band index to operate over. ex: (path, band_index) value_map (dictionary): a dictionary of values of {source_value: dest_value, ...} where source_value's type is the same as the values in ``base_raster_path`` at band ``band_index``. Cannot be empty and must contain a mapping for all raster values if ``values_required=True``. If nodata is mapped, nodata will be reclassified, otherwise nodata will be set to ``target_nodata``. target_raster_path (string): target raster output path; overwritten if it exists target_datatype (gdal type): the numerical type for the target raster target_nodata (numerical type): the nodata value for the target raster. Must be the same type as target_datatype. All nodata pixels in the base raster will be reclassified to this value, unless the base raster notata values are present in ``value_map``. values_required (bool): If True, raise a ValueError if there is a value in the raster that is not found in ``value_map``. raster_driver_creation_tuple (tuple): a tuple containing a GDAL driver name string as the first element and a GDAL creation options tuple/list as the second. Defaults to a GTiff driver tuple defined at geoprocessing.DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS. Return: None Raises: ReclassificationMissingValuesError if ``values_required`` is ``True`` and a pixel value from ``base_raster_path_band`` is not a key in ``value_map``. """ if len(value_map) == 0: raise ValueError("value_map must contain at least one value") if not _is_raster_path_band_formatted(base_raster_path_band): raise ValueError( "Expected a (path, band_id) tuple, instead got '%s'" % base_raster_path_band) raster_info = get_raster_info(base_raster_path_band[0]) nodata = raster_info['nodata'][base_raster_path_band[1]-1] # If nodata was included in the value_map pop it from our lists # and handle it separately. Doing this on the off chance nodata is # a max or min float which can cause floating decimal discrepencies. value_map_copy = value_map.copy() nodata_dest_value = target_nodata if nodata is not None: for key, val in value_map.items(): if numpy.isclose(key, nodata): nodata_dest_value = val del value_map_copy[key] break if target_nodata is None and nodata_dest_value is None: raise ValueError( "target_nodata was set to None and the base raster nodata" " value was not represented in the value_map. reclassify_raster" " does not assume the base raster nodata value should be used" " as the target_nodata value. Set target_nodata to a valid number" " and/or add a base raster nodata value mapping to value_map.") keys = sorted(numpy.array(list(value_map_copy.keys()))) values = numpy.array([value_map_copy[x] for x in keys]) def _map_dataset_to_value_op(original_values): """Convert a block of original values to the lookup values.""" out_array = numpy.full( original_values.shape, target_nodata, dtype=_GDAL_TYPE_TO_NUMPY_LOOKUP[target_datatype]) if nodata is None: valid_mask = numpy.full(original_values.shape, True) else: valid_mask = ~numpy.isclose(original_values, nodata) out_array[~valid_mask] = nodata_dest_value if values_required: unique = numpy.unique(original_values[valid_mask]) has_map = numpy.isin(unique, keys) if not all(has_map): missing_values = unique[~has_map] raise ReclassificationMissingValuesError( missing_values, base_raster_path_band[0], value_map ) index = numpy.digitize(original_values[valid_mask], keys, right=True) out_array[valid_mask] = values[index] return out_array raster_calculator( [base_raster_path_band], _map_dataset_to_value_op, target_raster_path, target_datatype, target_nodata, raster_driver_creation_tuple=raster_driver_creation_tuple)
[docs]def warp_raster( base_raster_path, target_pixel_size, target_raster_path, resample_method, target_bb=None, base_projection_wkt=None, target_projection_wkt=None, n_threads=None, vector_mask_options=None, gdal_warp_options=None, working_dir=None, raster_driver_creation_tuple=DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS, osr_axis_mapping_strategy=DEFAULT_OSR_AXIS_MAPPING_STRATEGY): """Resize/resample raster to desired pixel size, bbox and projection. Args: base_raster_path (string): path to base raster. target_pixel_size (list/tuple): a two element sequence indicating the x and y pixel size in projected units. target_raster_path (string): the location of the resized and resampled raster. resample_method (string): the resampling technique, one of ``near|bilinear|cubic|cubicspline|lanczos|average|mode|max|min|med|q1|q3`` target_bb (sequence): if None, target bounding box is the same as the source bounding box. Otherwise it's a sequence of float describing target bounding box in target coordinate system as [minx, miny, maxx, maxy]. base_projection_wkt (string): if not None, interpret the projection of ``base_raster_path`` as this. target_projection_wkt (string): if not None, desired target projection in Well Known Text format. n_threads (int): optional, if not None this sets the ``N_THREADS`` option for ``gdal.Warp``. vector_mask_options (dict): optional, if not None, this is a dictionary of options to use an existing vector's geometry to mask out pixels in the target raster that do not overlap the vector's geometry. Keys to this dictionary are: * ``'mask_vector_path'``: (str) path to the mask vector file. This vector will be automatically projected to the target projection if its base coordinate system does not match the target. * ``'mask_layer_id'``: (int/str) the layer index or name to use for masking, if this key is not in the dictionary the default is to use the layer at index 0. * ``'mask_vector_where_filter'``: (str) an SQL WHERE string that can be used to filter the geometry in the mask. Ex: 'id > 10' would use all features whose field value of 'id' is > 10. gdal_warp_options (sequence): if present, the contents of this list are passed to the ``warpOptions`` parameter of ``gdal.Warp``. See the GDAL Warp documentation for valid options. working_dir (string): if defined uses this directory to make temporary working files for calculation. Otherwise uses system's temp directory. raster_driver_creation_tuple (tuple): a tuple containing a GDAL driver name string as the first element and a GDAL creation options tuple/list as the second. Defaults to a GTiff driver tuple defined at geoprocessing.DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS. osr_axis_mapping_strategy (int): OSR axis mapping strategy for ``SpatialReference`` objects. Defaults to ``geoprocessing.DEFAULT_OSR_AXIS_MAPPING_STRATEGY``. This parameter should not be changed unless you know what you are doing. Return: None Raises: ValueError if ``pixel_size`` is not a 2 element sequence of numbers. ValueError if ``vector_mask_options`` is not None but the ``mask_vector_path`` is undefined or doesn't point to a valid file. """ _assert_is_valid_pixel_size(target_pixel_size) base_raster_info = get_raster_info(base_raster_path) if target_projection_wkt is None: target_projection_wkt = base_raster_info['projection_wkt'] if target_bb is None: # ensure it's a sequence so we can modify it working_bb = list(get_raster_info(base_raster_path)['bounding_box']) # transform the working_bb if target_projection_wkt is not None if target_projection_wkt is not None: LOGGER.debug( "transforming bounding box from %s ", working_bb) working_bb = transform_bounding_box( base_raster_info['bounding_box'], base_raster_info['projection_wkt'], target_projection_wkt) LOGGER.debug( "transforming bounding to %s ", working_bb) else: # ensure it's a sequence so we can modify it working_bb = list(target_bb) # determine the raster size that bounds the input bounding box and then # adjust the bounding box to be that size target_x_size = int(abs( float(working_bb[2] - working_bb[0]) / target_pixel_size[0])) target_y_size = int(abs( float(working_bb[3] - working_bb[1]) / target_pixel_size[1])) # sometimes bounding boxes are numerically perfect, this checks for that x_residual = ( abs(target_x_size * target_pixel_size[0]) - (working_bb[2] - working_bb[0])) if not numpy.isclose(x_residual, 0.0): target_x_size += 1 y_residual = ( abs(target_y_size * target_pixel_size[1]) - (working_bb[3] - working_bb[1])) if not numpy.isclose(y_residual, 0.0): target_y_size += 1 if target_x_size == 0: LOGGER.warning( "bounding_box is so small that x dimension rounds to 0; " "clamping to 1.") target_x_size = 1 if target_y_size == 0: LOGGER.warning( "bounding_box is so small that y dimension rounds to 0; " "clamping to 1.") target_y_size = 1 # this ensures the bounding boxes perfectly fit a multiple of the target # pixel size working_bb[2] = working_bb[0] + abs(target_pixel_size[0] * target_x_size) working_bb[3] = working_bb[1] + abs(target_pixel_size[1] * target_y_size) reproject_callback = _make_logger_callback( "Warp %.1f%% complete %s") warp_options = [] if n_threads: warp_options.append('NUM_THREADS=%d' % n_threads) if gdal_warp_options: warp_options.extend(gdal_warp_options) mask_vector_path = None mask_layer_id = 0 mask_vector_where_filter = None if vector_mask_options: # translate pygeoprocessing terminology into GDAL warp options. if 'mask_vector_path' not in vector_mask_options: raise ValueError( 'vector_mask_options passed, but no value for ' '"mask_vector_path": %s', vector_mask_options) mask_vector_path = vector_mask_options['mask_vector_path'] if not os.path.exists(mask_vector_path): raise ValueError( 'The mask vector at %s was not found.', mask_vector_path) if 'mask_layer_id' in vector_mask_options: mask_layer_id = vector_mask_options['mask_layer_id'] if 'mask_vector_where_filter' in vector_mask_options: mask_vector_where_filter = ( vector_mask_options['mask_vector_where_filter']) if vector_mask_options: temp_working_dir = tempfile.mkdtemp(dir=working_dir) warped_raster_path = os.path.join( temp_working_dir, os.path.basename(target_raster_path).replace( '.tif', '_nonmasked.tif')) else: # if there is no vector path the result is the warp warped_raster_path = target_raster_path base_raster = gdal.OpenEx(base_raster_path, gdal.OF_RASTER) raster_creation_options = list(raster_driver_creation_tuple[1]) if (base_raster_info['numpy_type'] == numpy.int8 and 'PIXELTYPE' not in ' '.join(raster_creation_options)): raster_creation_options.append('PIXELTYPE=SIGNEDBYTE') # WarpOptions.this is None when an invalid option is passed, and it's a # truthy SWIG proxy object when it's given a valid resample arg. if not gdal.WarpOptions(resampleAlg=resample_method)[0].this: raise ValueError( f'Invalid resample method: "{resample_method}"') gdal.Warp( warped_raster_path, base_raster, format=raster_driver_creation_tuple[0], outputBounds=working_bb, xRes=abs(target_pixel_size[0]), yRes=abs(target_pixel_size[1]), resampleAlg=resample_method, outputBoundsSRS=target_projection_wkt, srcSRS=base_projection_wkt, dstSRS=target_projection_wkt, multithread=True if warp_options else False, warpOptions=warp_options, creationOptions=raster_creation_options, callback=reproject_callback, callback_data=[target_raster_path]) if vector_mask_options: # Make sure the raster creation options passed to ``mask_raster`` # reflect any metadata updates updated_raster_driver_creation_tuple = ( raster_driver_creation_tuple[0], tuple(raster_creation_options)) # there was a cutline vector, so mask it out now, otherwise target # is already the result. mask_raster( (warped_raster_path, 1), vector_mask_options['mask_vector_path'], target_raster_path, mask_layer_id=mask_layer_id, where_clause=mask_vector_where_filter, target_mask_value=None, working_dir=temp_working_dir, all_touched=False, raster_driver_creation_tuple=updated_raster_driver_creation_tuple) shutil.rmtree(temp_working_dir)
[docs]def rasterize( vector_path, target_raster_path, burn_values=None, option_list=None, layer_id=0, where_clause=None): """Project a vector onto an existing raster. Burn the layer at ``layer_id`` in ``vector_path`` to an existing raster at ``target_raster_path_band``. Args: vector_path (string): filepath to vector to rasterize. target_raster_path (string): path to an existing raster to burn vector into. Can have multiple bands. burn_values (list/tuple): optional sequence of values to burn into each band of the raster. If used, should have the same length as number of bands at the ``target_raster_path`` raster. If ``None`` then ``option_list`` must have a valid value. option_list (list/tuple): optional a sequence of burn options, if None then a valid value for ``burn_values`` must exist. Otherwise, each element is a string of the form: * ``"ATTRIBUTE=?"``: Identifies an attribute field on the features to be used for a burn in value. The value will be burned into all output bands. If specified, ``burn_values`` will not be used and can be None. * ``"CHUNKYSIZE=?"``: The height in lines of the chunk to operate on. The larger the chunk size the less times we need to make a pass through all the shapes. If it is not set or set to zero the default chunk size will be used. Default size will be estimated based on the GDAL cache buffer size using formula: ``cache_size_bytes/scanline_size_bytes``, so the chunk will not exceed the cache. * ``"ALL_TOUCHED=TRUE/FALSE"``: May be set to ``TRUE`` to set all pixels touched by the line or polygons, not just those whose center is within the polygon or that are selected by Brezenhams line algorithm. Defaults to ``FALSE``. * ``"BURN_VALUE_FROM"``: May be set to "Z" to use the Z values of the geometries. The value from burn_values or the attribute field value is added to this before burning. In default case dfBurnValue is burned as it is (richpsharp: note, I'm not sure what this means, but copied from formal docs). This is implemented properly only for points and lines for now. Polygons will be burned using the Z value from the first point. * ``"MERGE_ALG=REPLACE/ADD"``: REPLACE results in overwriting of value, while ADD adds the new value to the existing raster, suitable for heatmaps for instance. Example:: ["ATTRIBUTE=npv", "ALL_TOUCHED=TRUE"] layer_id (str/int): name or index of the layer to rasterize. Defaults to 0. where_clause (str): If not None, is an SQL query-like string to filter which features are used to rasterize, (e.x. where="value=1"). Return: None """ gdal.PushErrorHandler('CPLQuietErrorHandler') raster = gdal.OpenEx(target_raster_path, gdal.GA_Update | gdal.OF_RASTER) gdal.PopErrorHandler() if raster is None: raise ValueError( "%s doesn't exist, but needed to rasterize." % target_raster_path) rasterize_callback = _make_logger_callback( "RasterizeLayer %.1f%% complete %s") if burn_values is None: burn_values = [] if option_list is None: option_list = [] if not burn_values and not option_list: raise ValueError( "Neither `burn_values` nor `option_list` is set. At least " "one must have a value.") if not isinstance(burn_values, (list, tuple)): raise ValueError( "`burn_values` is not a list/tuple, the value passed is '%s'", repr(burn_values)) if not isinstance(option_list, (list, tuple)): raise ValueError( "`option_list` is not a list/tuple, the value passed is '%s'", repr(option_list)) vector = gdal.OpenEx(vector_path, gdal.OF_VECTOR) layer = vector.GetLayer(layer_id) if where_clause: layer.SetAttributeFilter(where_clause) try: result = gdal.RasterizeLayer( raster, [1], layer, burn_values=burn_values, options=option_list, callback=rasterize_callback) except Exception: # something bad happened, but still clean up # this case came out of a flaky test condition where the raster # would still be in use by the rasterize layer function LOGGER.exception('bad error on rasterizelayer') result = -1 layer = None vector = None if result != 0: # need this __swig_destroy__ because we sometimes encounter a flaky # test where the path to the raster cannot be cleaned up because # it is still in use somewhere, likely a bug in gdal.RasterizeLayer # note it is only invoked if there is a serious error gdal.Dataset.__swig_destroy__(raster) raise RuntimeError('Rasterize returned a nonzero exit code.') raster = None
[docs]def calculate_disjoint_polygon_set( vector_path, layer_id=0, bounding_box=None): """Create a sequence of sets of polygons that don't overlap. Determining the minimal number of those sets is an np-complete problem so this is an approximation that builds up sets of maximal subsets. Args: vector_path (string): a path to an OGR vector. layer_id (str/int): name or index of underlying layer in ``vector_path`` to calculate disjoint set. Defaults to 0. bounding_box (sequence): sequence of floats representing a bounding box to filter any polygons by. If a feature in ``vector_path`` does not intersect this bounding box it will not be considered in the disjoint calculation. Coordinates are in the order [minx, miny, maxx, maxy]. Return: subset_list (sequence): sequence of sets of FIDs from vector_path """ vector = gdal.OpenEx(vector_path, gdal.OF_VECTOR) vector_layer = vector.GetLayer(layer_id) feature_count = vector_layer.GetFeatureCount() if feature_count == 0: raise RuntimeError('Vector must have geometries but does not: %s' % vector_path) last_time = time.time() LOGGER.info("build shapely polygon list") if bounding_box is None: bounding_box = get_vector_info(vector_path)['bounding_box'] bounding_box = shapely.prepared.prep(shapely.geometry.box(*bounding_box)) shapely_polygon_lookup = {} for poly_feat in vector_layer: poly_geom_ref = poly_feat.GetGeometryRef() if poly_geom_ref is None: LOGGER.warning( f'no geometry in {vector_path} FID: {poly_feat.GetFID()}, ' 'skipping...') continue if poly_geom_ref.IsEmpty(): LOGGER.warning( f'empty geometry in {vector_path} FID: {poly_feat.GetFID()}, ' 'skipping...') continue # with GDAL>=3.3.0 ExportToWkb returns a bytearray instead of bytes shapely_polygon_lookup[poly_feat.GetFID()] = ( shapely.wkb.loads(bytes(poly_geom_ref.ExportToWkb()))) poly_geom_ref = None poly_feat = None LOGGER.info("build shapely rtree index") r_tree_index_stream = [ (poly_fid, poly.bounds, None) for poly_fid, poly in shapely_polygon_lookup.items() if bounding_box.intersects(poly)] if r_tree_index_stream: poly_rtree_index = rtree.index.Index(r_tree_index_stream) else: LOGGER.warning("no polygons intersected the bounding box") return [] vector_layer = None vector = None LOGGER.info( 'poly feature lookup 100.0%% complete on %s', os.path.basename(vector_path)) LOGGER.info('build poly intersection lookup') poly_intersect_lookup = collections.defaultdict(set) for poly_index, (poly_fid, poly_geom) in enumerate( shapely_polygon_lookup.items()): last_time = _invoke_timed_callback( last_time, lambda: LOGGER.info( "poly intersection lookup approximately %.1f%% complete " "on %s", 100.0 * float(poly_index+1) / len( shapely_polygon_lookup), os.path.basename(vector_path)), _LOGGING_PERIOD) possible_intersection_set = list(poly_rtree_index.intersection( poly_geom.bounds)) # no reason to prep the polygon to intersect itself if len(possible_intersection_set) > 1: polygon = shapely.prepared.prep(poly_geom) else: polygon = poly_geom for intersect_poly_fid in possible_intersection_set: if intersect_poly_fid == poly_fid or polygon.intersects( shapely_polygon_lookup[intersect_poly_fid]): poly_intersect_lookup[poly_fid].add(intersect_poly_fid) polygon = None LOGGER.info( 'poly intersection feature lookup 100.0%% complete on %s', os.path.basename(vector_path)) # Build maximal subsets subset_list = [] while len(poly_intersect_lookup) > 0: # sort polygons by increasing number of intersections intersections_list = [ (len(poly_intersect_set), poly_fid, poly_intersect_set) for poly_fid, poly_intersect_set in poly_intersect_lookup.items()] intersections_list.sort() # build maximal subset maximal_set = set() for _, poly_fid, poly_intersect_set in intersections_list: last_time = _invoke_timed_callback( last_time, lambda: LOGGER.info( "maximal subset build approximately %.1f%% complete " "on %s", 100.0 * float( feature_count - len(poly_intersect_lookup)) / feature_count, os.path.basename(vector_path)), _LOGGING_PERIOD) if not poly_intersect_set.intersection(maximal_set): # no intersection, add poly_fid to the maximal set and remove # the polygon from the lookup maximal_set.add(poly_fid) del poly_intersect_lookup[poly_fid] # remove all the polygons from intersections once they're computed for poly_fid, poly_intersect_set in poly_intersect_lookup.items(): poly_intersect_lookup[poly_fid] = ( poly_intersect_set.difference(maximal_set)) subset_list.append(maximal_set) LOGGER.info( 'maximal subset build 100.0%% complete on %s', os.path.basename(vector_path)) return subset_list
[docs]def distance_transform_edt( base_region_raster_path_band, target_distance_raster_path, sampling_distance=(1., 1.), working_dir=None, raster_driver_creation_tuple=DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS): """Calculate the euclidean distance transform on base raster. Calculates the euclidean distance transform on the base raster in units of pixels multiplied by an optional scalar constant. The implementation is based off the algorithm described in: Meijster, Arnold, Jos BTM Roerdink, and Wim H. Hesselink. "A general algorithm for computing distance transforms in linear time." Mathematical Morphology and its applications to image and signal processing. Springer, Boston, MA, 2002. 331-340. The base mask raster represents the area to distance transform from as any pixel that is not 0 or nodata. It is computationally convenient to calculate the distance transform on the entire raster irrespective of nodata placement and thus produces a raster that will have distance transform values even in pixels that are nodata in the base. Args: base_region_raster_path_band (tuple): a tuple including file path to a raster and the band index to define the base region pixels. Any pixel that is not 0 and nodata are considered to be part of the region. target_distance_raster_path (string): path to the target raster that is the exact euclidean distance transform from any pixel in the base raster that is not nodata and not 0. The units are in ``(pixel distance * sampling_distance)``. sampling_distance (tuple/list): an optional parameter used to scale the pixel distances when calculating the distance transform. Defaults to (1.0, 1.0). First element indicates the distance traveled in the x direction when changing a column index, and the second element in y when changing a row index. Both values must be > 0. working_dir (string): If not None, indicates where temporary files should be created during this run. raster_driver_creation_tuple (tuple): a tuple containing a GDAL driver name string as the first element and a GDAL creation options tuple/list as the second. Defaults to a GTiff driver tuple defined at geoprocessing.DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS. Return: None """ working_raster_paths = {} for raster_prefix in ['region_mask_raster', 'g_raster']: with tempfile.NamedTemporaryFile( prefix=raster_prefix, suffix='.tif', delete=False, dir=working_dir) as tmp_file: working_raster_paths[raster_prefix] = tmp_file.name nodata = (get_raster_info(base_region_raster_path_band[0])['nodata'])[ base_region_raster_path_band[1]-1] nodata_out = 255 def mask_op(base_array): """Convert base_array to 1 if not 0 and nodata, 0 otherwise.""" if nodata is not None: return ~numpy.isclose(base_array, nodata) & (base_array != 0) else: return base_array != 0 if not isinstance(sampling_distance, (tuple, list)): raise ValueError( "`sampling_distance` should be a tuple/list, instead it's %s" % ( type(sampling_distance))) sample_d_x, sample_d_y = sampling_distance if sample_d_x <= 0. or sample_d_y <= 0.: raise ValueError( "Sample distances must be > 0.0, instead got %s", sampling_distance) raster_calculator( [base_region_raster_path_band], mask_op, working_raster_paths['region_mask_raster'], gdal.GDT_Byte, nodata_out, calc_raster_stats=False, raster_driver_creation_tuple=raster_driver_creation_tuple) geoprocessing_core._distance_transform_edt( working_raster_paths['region_mask_raster'], working_raster_paths['g_raster'], sampling_distance[0], sampling_distance[1], target_distance_raster_path, raster_driver_creation_tuple) for path in working_raster_paths.values(): try: os.remove(path) except OSError: LOGGER.warning("couldn't remove file %s", path)
def _next_regular(base): """Find the next regular number greater than or equal to base. Regular numbers are composites of the prime factors 2, 3, and 5. Also known as 5-smooth numbers or Hamming numbers, these are the optimal size for inputs to FFTPACK. This source was taken directly from scipy.signaltools and saves us from having to access a protected member in a library that could change in future releases: https://github.com/scipy/scipy/blob/v0.17.1/scipy/signal/signaltools.py#L211 Args: base (int): a positive integer to start to find the next Hamming number. Return: The next regular number greater than or equal to ``base``. """ if base <= 6: return base # Quickly check if it's already a power of 2 if not (base & (base-1)): return base match = float('inf') # Anything found will be smaller p5 = 1 while p5 < base: p35 = p5 while p35 < base: # Ceiling integer division, avoiding conversion to float # (quotient = ceil(base / p35)) quotient = -(-base // p35) # Quickly find next power of 2 >= quotient p2 = 2**((quotient - 1).bit_length()) N = p2 * p35 if N == base: return N elif N < match: match = N p35 *= 3 if p35 == base: return p35 if p35 < match: match = p35 p5 *= 5 if p5 == base: return p5 if p5 < match: match = p5 return match
[docs]def convolve_2d( signal_path_band, kernel_path_band, target_path, ignore_nodata_and_edges=False, mask_nodata=True, normalize_kernel=False, target_datatype=gdal.GDT_Float64, target_nodata=None, working_dir=None, set_tol_to_zero=1e-8, max_timeout=_MAX_TIMEOUT, raster_driver_creation_tuple=DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS): """Convolve 2D kernel over 2D signal. Convolves the raster in ``kernel_path_band`` over ``signal_path_band``. Nodata values are treated as 0.0 during the convolution and masked to nodata for the output result where ``signal_path`` has nodata. Note with default values, boundary effects can be seen in the result where the kernel would hang off the edge of the raster or in regions with nodata pixels. The function would treat these areas as values with "0.0" by default thus pulling the total convolution down in these areas. This is similar to setting ``mode='same'`` in Numpy's ``convolve`` function: https://numpy.org/doc/stable/reference/generated/numpy.convolve.html This boundary effect can be avoided by setting ``ignore_nodata_and_edges=True`` which normalizes the target result by dynamically accounting for the number of valid signal pixels the kernel overlapped during the convolution step. Args: signal_path_band (tuple): a 2 tuple of the form (filepath to signal raster, band index). kernel_path_band (tuple): a 2 tuple of the form (filepath to kernel raster, band index), all pixel values should be valid -- output is not well defined if the kernel raster has nodata values. target_path (string): filepath to target raster that's the convolution of signal with kernel. Output will be a single band raster of same size and projection as ``signal_path_band``. Any nodata pixels that align with ``signal_path_band`` will be set to nodata. ignore_nodata_and_edges (boolean): If true, any pixels that are equal to ``signal_path_band``'s nodata value or signal pixels where the kernel extends beyond the edge of the raster are not included when averaging the convolution filter. This has the effect of "spreading" the result as though nodata and edges beyond the bounds of the raster are 0s. If set to false this tends to "pull" the signal away from nodata holes or raster edges. Set this value to ``True`` to avoid distortions signal values near edges for large integrating kernels. It can be useful to set this value to ``True`` to fill nodata holes through distance weighted averaging. In this case ``mask_nodata`` must be set to ``False`` so the result does not mask out these areas which are filled in. When using this technique be careful of cases where the kernel does not extend over any areas except nodata holes, in this case the resulting values in these areas will be nonsensical numbers, perhaps numerical infinity or NaNs. normalize_kernel (boolean): If true, the result is divided by the sum of the kernel. mask_nodata (boolean): If true, ``target_path`` raster's output is nodata where ``signal_path_band``'s pixels were nodata. Note that setting ``ignore_nodata_and_edges`` to ``True`` while setting ``mask_nodata`` to ``False`` can allow for a technique involving distance weighted averaging to define areas that would otherwise be nodata. Be careful in cases where the kernel does not extend over any valid non-nodata area since the result can be numerical infinity or NaNs. target_datatype (GDAL type): a GDAL raster type to set the output raster type to, as well as the type to calculate the convolution in. Defaults to GDT_Float64. Note signed byte is not supported. target_nodata (int/float): nodata value to set on output raster. If ``target_datatype`` is not gdal.GDT_Float64, this value must be set. Otherwise defaults to the minimum value of a float32. raster_creation_options (sequence): an argument list that will be passed to the GTiff driver for creating ``target_path``. Useful for blocksizes, compression, and more. working_dir (string): If not None, indicates where temporary files should be created during this run. set_tol_to_zero (float): any value within +- this from 0.0 will get set to 0.0. This is to handle numerical roundoff errors that sometimes result in "numerical zero", such as -1.782e-18 that cannot be tolerated by users of this function. If `None` no adjustment will be done to output values. max_timeout (float): maximum amount of time to wait for worker thread to terminate. raster_driver_creation_tuple (tuple): a tuple containing a GDAL driver name string as the first element and a GDAL creation options tuple/list as the second. Defaults to a GTiff driver tuple defined at geoprocessing.DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS. Return: ``None`` Raises: ValueError: if ``ignore_nodata_and_edges`` is ``True`` and ``mask_nodata`` is ``False``. ValueError: if ``signal_path_band`` or ``kernel_path_band`` is a row based blocksize which would result in slow runtimes due to gdal cache thrashing. """ if target_datatype is not gdal.GDT_Float64 and target_nodata is None: raise ValueError( "`target_datatype` is set, but `target_nodata` is None. " "`target_nodata` must be set if `target_datatype` is not " "`gdal.GDT_Float64`. `target_nodata` is set to None.") if target_nodata is None: target_nodata = float(numpy.finfo(numpy.float32).min) if ignore_nodata_and_edges and not mask_nodata: LOGGER.debug( 'ignore_nodata_and_edges is True while mask_nodata is False -- ' 'this can yield a nonsensical result in areas where the kernel ' 'touches only nodata values.') bad_raster_path_list = [] for raster_id, raster_path_band in [ ('signal', signal_path_band), ('kernel', kernel_path_band)]: if (not _is_raster_path_band_formatted(raster_path_band)): bad_raster_path_list.append((raster_id, raster_path_band)) if bad_raster_path_list: raise ValueError( "Expected raster path band sequences for the following arguments " f"but instead got: {bad_raster_path_list}") signal_raster_info = get_raster_info(signal_path_band[0]) kernel_raster_info = get_raster_info(kernel_path_band[0]) for info_dict in [signal_raster_info, kernel_raster_info]: if 1 in info_dict['block_size']: raise ValueError( f'{signal_path_band} has a row blocksize which can make this ' f'function run very slow, create a square blocksize using ' f'`warp_raster` or `align_and_resize_raster_stack` which ' f'creates square blocksizes by default') # The nodata value is reset to a different value at the end of this # function. Here 0 is chosen as a default value since data are # incrementally added to the raster new_raster_from_base( signal_path_band[0], target_path, target_datatype, [0], raster_driver_creation_tuple=raster_driver_creation_tuple) n_cols_signal, n_rows_signal = signal_raster_info['raster_size'] n_cols_kernel, n_rows_kernel = kernel_raster_info['raster_size'] s_path_band = signal_path_band k_path_band = kernel_path_band s_nodata = signal_raster_info['nodata'][0] # we need the original signal raster info because we want the output to # be clipped and NODATA masked to it signal_raster = gdal.OpenEx(signal_path_band[0], gdal.OF_RASTER) signal_band = signal_raster.GetRasterBand(signal_path_band[1]) # getting the offset list before it's opened for updating target_offset_list = list(iterblocks((target_path, 1), offset_only=True)) target_raster = gdal.OpenEx(target_path, gdal.OF_RASTER | gdal.GA_Update) target_band = target_raster.GetRasterBand(1) # if we're ignoring nodata, we need to make a parallel convolved signal # of the nodata mask if ignore_nodata_and_edges: raster_file, mask_raster_path = tempfile.mkstemp( suffix='.tif', prefix='convolved_mask', dir=os.path.dirname(target_path)) os.close(raster_file) new_raster_from_base( signal_path_band[0], mask_raster_path, gdal.GDT_Float64, [0.0], raster_driver_creation_tuple=raster_driver_creation_tuple) mask_raster = gdal.OpenEx( mask_raster_path, gdal.GA_Update | gdal.OF_RASTER) mask_band = mask_raster.GetRasterBand(1) LOGGER.info('starting convolve') last_time = time.time() # calculate the kernel sum for normalization kernel_nodata = kernel_raster_info['nodata'][0] kernel_sum = 0.0 for _, kernel_block in iterblocks(kernel_path_band): if kernel_nodata is not None and ignore_nodata_and_edges: kernel_block[numpy.isclose(kernel_block, kernel_nodata)] = 0.0 kernel_sum += numpy.sum(kernel_block) # limit the size of the work queue since a large kernel / signal with small # block size can have a large memory impact when queuing offset lists. work_queue = queue.Queue(10) signal_offset_list = list(iterblocks(s_path_band, offset_only=True)) kernel_offset_list = list(iterblocks(k_path_band, offset_only=True)) n_blocks = len(signal_offset_list) * len(kernel_offset_list) LOGGER.debug('start fill work queue thread') def _fill_work_queue(): """Asynchronously fill the work queue.""" LOGGER.debug('fill work queue') for signal_offset in signal_offset_list: for kernel_offset in kernel_offset_list: work_queue.put((signal_offset, kernel_offset)) work_queue.put(None) LOGGER.debug('work queue full') fill_work_queue_worker = threading.Thread( target=_fill_work_queue) fill_work_queue_worker.daemon = True fill_work_queue_worker.start() # limit the size of the write queue so we don't accidentally load a whole # array into memory LOGGER.debug('start worker thread') write_queue = queue.Queue(10) worker = threading.Thread( target=_convolve_2d_worker, args=( signal_path_band, kernel_path_band, ignore_nodata_and_edges, normalize_kernel, set_tol_to_zero, work_queue, write_queue)) worker.daemon = True worker.start() n_blocks_processed = 0 LOGGER.info(f'{n_blocks} sent to workers, wait for worker results') while True: # the timeout guards against a worst case scenario where the # ``_convolve_2d_worker`` has crashed. write_payload = write_queue.get(timeout=_MAX_TIMEOUT) if write_payload: (index_dict, result, mask_result, left_index_raster, right_index_raster, top_index_raster, bottom_index_raster, left_index_result, right_index_result, top_index_result, bottom_index_result) = write_payload else: worker.join(max_timeout) break output_array = numpy.empty( (index_dict['win_ysize'], index_dict['win_xsize']), dtype=numpy.float32) # the inital data value in target_band is 0 because that is the # temporary nodata selected so that manual resetting of initial # data values weren't necessary. at the end of this function the # target nodata value is set to `target_nodata`. current_output = target_band.ReadAsArray(**index_dict) # read the signal block so we know where the nodata are potential_nodata_signal_array = signal_band.ReadAsArray(**index_dict) valid_mask = numpy.ones( potential_nodata_signal_array.shape, dtype=bool) # guard against a None nodata value if s_nodata is not None and mask_nodata: valid_mask[:] = ( ~numpy.isclose(potential_nodata_signal_array, s_nodata)) output_array[:] = target_nodata output_array[valid_mask] = ( (result[top_index_result:bottom_index_result, left_index_result:right_index_result])[valid_mask] + current_output[valid_mask]) target_band.WriteArray( output_array, xoff=index_dict['xoff'], yoff=index_dict['yoff']) if ignore_nodata_and_edges: # we'll need to save off the mask convolution so we can divide # it in total later current_mask = mask_band.ReadAsArray(**index_dict) output_array[valid_mask] = ( (mask_result[ top_index_result:bottom_index_result, left_index_result:right_index_result])[valid_mask] + current_mask[valid_mask]) mask_band.WriteArray( output_array, xoff=index_dict['xoff'], yoff=index_dict['yoff']) n_blocks_processed += 1 last_time = _invoke_timed_callback( last_time, lambda: LOGGER.info( "convolution worker approximately %.1f%% complete on %s", 100.0 * float(n_blocks_processed) / (n_blocks), os.path.basename(target_path)), _LOGGING_PERIOD) LOGGER.info( f"convolution worker 100.0% complete on " f"{os.path.basename(target_path)}") target_band.FlushCache() if ignore_nodata_and_edges: signal_nodata = get_raster_info(signal_path_band[0])['nodata'][ signal_path_band[1]-1] LOGGER.info( "need to normalize result so nodata values are not included") mask_pixels_processed = 0 mask_band.FlushCache() for target_offset_data in target_offset_list: target_block = target_band.ReadAsArray( **target_offset_data).astype(numpy.float64) signal_block = signal_band.ReadAsArray(**target_offset_data) mask_block = mask_band.ReadAsArray(**target_offset_data) if mask_nodata and signal_nodata is not None: valid_mask = ~numpy.isclose(signal_block, signal_nodata) else: valid_mask = numpy.ones(target_block.shape, dtype=bool) valid_mask &= (mask_block > 0) # divide the target_band by the mask_band target_block[valid_mask] /= mask_block[valid_mask].astype( numpy.float64) # scale by kernel sum if necessary since mask division will # automatically normalize kernel if not normalize_kernel: target_block[valid_mask] *= kernel_sum target_band.WriteArray( target_block, xoff=target_offset_data['xoff'], yoff=target_offset_data['yoff']) mask_pixels_processed += target_block.size last_time = _invoke_timed_callback( last_time, lambda: LOGGER.info( "convolution nodata normalizer approximately %.1f%% " "complete on %s", 100.0 * float(mask_pixels_processed) / ( n_cols_signal * n_rows_signal), os.path.basename(target_path)), _LOGGING_PERIOD) mask_raster = None mask_band = None os.remove(mask_raster_path) LOGGER.info( f"convolution nodata normalize 100.0% complete on " f"{os.path.basename(target_path)}") # set the nodata value from 0 to a reasonable value for the result target_band.SetNoDataValue(target_nodata) target_band = None target_raster = None
[docs]def iterblocks( raster_path_band, largest_block=_LARGEST_ITERBLOCK, offset_only=False): """Iterate across all the memory blocks in the input raster. Result is a generator of block location information and numpy arrays. This is especially useful when a single value needs to be derived from the pixel values in a raster, such as the sum total of all pixel values, or a sequence of unique raster values. In such cases, ``raster_local_op`` is overkill, since it writes out a raster. As a generator, this can be combined multiple times with itertools.izip() to iterate 'simultaneously' over multiple rasters, though the user should be careful to do so only with prealigned rasters. Args: raster_path_band (tuple): a path/band index tuple to indicate which raster band iterblocks should iterate over. largest_block (int): Attempts to iterate over raster blocks with this many elements. Useful in cases where the blocksize is relatively small, memory is available, and the function call overhead dominates the iteration. Defaults to 2**20. A value of anything less than the original blocksize of the raster will result in blocksizes equal to the original size. offset_only (boolean): defaults to False, if True ``iterblocks`` only returns offset dictionary and doesn't read any binary data from the raster. This can be useful when iterating over writing to an output. Yields: If ``offset_only`` is false, on each iteration, a tuple containing a dict of block data and a 2-dimensional numpy array are yielded. The dict of block data has these attributes: * ``data['xoff']`` - The X offset of the upper-left-hand corner of the block. * ``data['yoff']`` - The Y offset of the upper-left-hand corner of the block. * ``data['win_xsize']`` - The width of the block. * ``data['win_ysize']`` - The height of the block. If ``offset_only`` is True, the function returns only the block offset data and does not attempt to read binary data from the raster. """ if not _is_raster_path_band_formatted(raster_path_band): raise ValueError( "`raster_path_band` not formatted as expected. Expects " "(path, band_index), received %s" % repr(raster_path_band)) raster = gdal.OpenEx(raster_path_band[0], gdal.OF_RASTER) if raster is None: raise ValueError( "Raster at %s could not be opened." % raster_path_band[0]) band = raster.GetRasterBand(raster_path_band[1]) block = band.GetBlockSize() cols_per_block = block[0] rows_per_block = block[1] n_cols = raster.RasterXSize n_rows = raster.RasterYSize block_area = cols_per_block * rows_per_block # try to make block wider if int(largest_block / block_area) > 0: width_factor = int(largest_block / block_area) cols_per_block *= width_factor if cols_per_block > n_cols: cols_per_block = n_cols block_area = cols_per_block * rows_per_block # try to make block taller if int(largest_block / block_area) > 0: height_factor = int(largest_block / block_area) rows_per_block *= height_factor if rows_per_block > n_rows: rows_per_block = n_rows n_col_blocks = int(math.ceil(n_cols / float(cols_per_block))) n_row_blocks = int(math.ceil(n_rows / float(rows_per_block))) for row_block_index in range(n_row_blocks): row_offset = row_block_index * rows_per_block row_block_width = n_rows - row_offset if row_block_width > rows_per_block: row_block_width = rows_per_block for col_block_index in range(n_col_blocks): col_offset = col_block_index * cols_per_block col_block_width = n_cols - col_offset if col_block_width > cols_per_block: col_block_width = cols_per_block offset_dict = { 'xoff': col_offset, 'yoff': row_offset, 'win_xsize': col_block_width, 'win_ysize': row_block_width, } if offset_only: yield offset_dict else: yield (offset_dict, band.ReadAsArray(**offset_dict)) band = None raster = None
[docs]def transform_bounding_box( bounding_box, base_projection_wkt, target_projection_wkt, edge_samples=11, osr_axis_mapping_strategy=DEFAULT_OSR_AXIS_MAPPING_STRATEGY): """Transform input bounding box to output projection. This transform accounts for the fact that the reprojected square bounding box might be warped in the new coordinate system. To account for this, the function samples points along the original bounding box edges and attempts to make the largest bounding box around any transformed point on the edge whether corners or warped edges. Args: bounding_box (sequence): a sequence of 4 coordinates in ``base_epsg`` coordinate system describing the bound in the order [xmin, ymin, xmax, ymax]. base_projection_wkt (string): the spatial reference of the input coordinate system in Well Known Text. target_projection_wkt (string): the spatial reference of the desired output coordinate system in Well Known Text. edge_samples (int): the number of interpolated points along each bounding box edge to sample along. A value of 2 will sample just the corners while a value of 3 will also sample the corners and the midpoint. osr_axis_mapping_strategy (int): OSR axis mapping strategy for ``SpatialReference`` objects. Defaults to ``geoprocessing.DEFAULT_OSR_AXIS_MAPPING_STRATEGY``. This parameter should not be changed unless you know what you are doing. Return: A list of the form [xmin, ymin, xmax, ymax] that describes the largest fitting bounding box around the original warped bounding box in ``new_epsg`` coordinate system. Raises: ``ValueError`` if resulting transform yields non-finite coordinates. This would indicate an ill posed transform region that the user should address. """ base_ref = osr.SpatialReference() base_ref.ImportFromWkt(base_projection_wkt) target_ref = osr.SpatialReference() target_ref.ImportFromWkt(target_projection_wkt) base_ref.SetAxisMappingStrategy(osr_axis_mapping_strategy) target_ref.SetAxisMappingStrategy(osr_axis_mapping_strategy) # Create a coordinate transformation transformer = osr.CreateCoordinateTransformation(base_ref, target_ref) def _transform_point(point): """Transform an (x,y) point tuple from base_ref to target_ref.""" trans_x, trans_y, _ = (transformer.TransformPoint(*point)) return (trans_x, trans_y) # The following list comprehension iterates over each edge of the bounding # box, divides each edge into ``edge_samples`` number of points, then # reduces that list to an appropriate ``bounding_fn`` given the edge. # For example the left edge needs to be the minimum x coordinate so # we generate ``edge_samples` number of points between the upper left and # lower left point, transform them all to the new coordinate system # then get the minimum x coordinate "min(p[0] ...)" of the batch. # points are numbered from 0 starting upper right as follows: # 0--3 # | | # 1--2 p_0 = numpy.array((bounding_box[0], bounding_box[3])) p_1 = numpy.array((bounding_box[0], bounding_box[1])) p_2 = numpy.array((bounding_box[2], bounding_box[1])) p_3 = numpy.array((bounding_box[2], bounding_box[3])) raw_bounding_box = [ bounding_fn( [_transform_point( p_a * v + p_b * (1 - v)) for v in numpy.linspace( 0, 1, edge_samples)]) for p_a, p_b, bounding_fn in [ (p_0, p_1, lambda p_list: min([p[0] for p in p_list])), (p_1, p_2, lambda p_list: min([p[1] for p in p_list])), (p_2, p_3, lambda p_list: max([p[0] for p in p_list])), (p_3, p_0, lambda p_list: max([p[1] for p in p_list]))]] # sometimes a transform will be so tight that a sampling around it may # flip the coordinate system. This flips it back. I found this when # transforming the bounding box of Gibraltar in a utm coordinate system # to lat/lng. minx, maxx = sorted([raw_bounding_box[0], raw_bounding_box[2]]) miny, maxy = sorted([raw_bounding_box[1], raw_bounding_box[3]]) transformed_bounding_box = [minx, miny, maxx, maxy] if not all(numpy.isfinite(numpy.array(transformed_bounding_box))): raise ValueError( f'Could not transform bounding box from base to target projection.' f'Some transformed coordinates are not finite: ' f'{transformed_bounding_box}, base bounding box may not fit into ' f'target coordinate projection system.\n' f'Original bounding box: {bounding_box}\n' f'Base projection: {base_projection_wkt}\n' f'Target projection: {target_projection_wkt}\n') return transformed_bounding_box
[docs]def mask_raster( base_raster_path_band, mask_vector_path, target_mask_raster_path, mask_layer_id=0, target_mask_value=None, working_dir=None, all_touched=False, where_clause=None, raster_driver_creation_tuple=DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS): """Mask a raster band with a given vector. Args: base_raster_path_band (tuple): a (path, band number) tuple indicating the data to mask. mask_vector_path (path): path to a vector that will be used to mask anything outside of the polygon that overlaps with ``base_raster_path_band`` to ``target_mask_value`` if defined or else ``base_raster_path_band``'s nodata value. target_mask_raster_path (str): path to desired target raster that is a copy of ``base_raster_path_band`` except any pixels that do not intersect with ``mask_vector_path`` are set to ``target_mask_value`` or ``base_raster_path_band``'s nodata value if ``target_mask_value`` is None. mask_layer_id (str/int): an index or name to identify the mask geometry layer in ``mask_vector_path``, default is 0. target_mask_value (numeric): If not None, this value is written to any pixel in ``base_raster_path_band`` that does not intersect with ``mask_vector_path``. Otherwise the nodata value of ``base_raster_path_band`` is used. working_dir (str): this is a path to a directory that can be used to hold temporary files required to complete this operation. all_touched (bool): if False, a pixel is only masked if its centroid intersects with the mask. If True a pixel is masked if any point of the pixel intersects the polygon mask. where_clause (str): (optional) if not None, it is an SQL compatible where clause that can be used to filter the features that are used to mask the base raster. raster_driver_creation_tuple (tuple): a tuple containing a GDAL driver name string as the first element and a GDAL creation options tuple/list as the second. Defaults to a GTiff driver tuple defined at geoprocessing.DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS. Return: None """ with tempfile.NamedTemporaryFile( prefix='mask_raster', delete=False, suffix='.tif', dir=working_dir) as mask_raster_file: mask_raster_path = mask_raster_file.name new_raster_from_base( base_raster_path_band[0], mask_raster_path, gdal.GDT_Byte, [255], fill_value_list=[0], raster_driver_creation_tuple=raster_driver_creation_tuple) base_raster_info = get_raster_info(base_raster_path_band[0]) rasterize( mask_vector_path, mask_raster_path, burn_values=[1], layer_id=mask_layer_id, option_list=[('ALL_TOUCHED=%s' % all_touched).upper()], where_clause=where_clause) base_nodata = base_raster_info['nodata'][base_raster_path_band[1]-1] if target_mask_value is None: mask_value = base_nodata if mask_value is None: LOGGER.warning( "No mask value was passed and target nodata is undefined, " "defaulting to 0 as the target mask value.") mask_value = 0 else: mask_value = target_mask_value def mask_op(base_array, mask_array): result = numpy.copy(base_array) result[mask_array == 0] = mask_value return result raster_calculator( [base_raster_path_band, (mask_raster_path, 1)], mask_op, target_mask_raster_path, base_raster_info['datatype'], base_nodata, raster_driver_creation_tuple=raster_driver_creation_tuple) os.remove(mask_raster_path)
def _invoke_timed_callback( reference_time, callback_lambda, callback_period): """Invoke callback if a certain amount of time has passed. This is a convenience function to standardize update callbacks from the module. Args: reference_time (float): time to base ``callback_period`` length from. callback_lambda (lambda): function to invoke if difference between current time and ``reference_time`` has exceeded ``callback_period``. callback_period (float): time in seconds to pass until ``callback_lambda`` is invoked. Return: ``reference_time`` if ``callback_lambda`` not invoked, otherwise the time when ``callback_lambda`` was invoked. """ current_time = time.time() if current_time - reference_time > callback_period: callback_lambda() return current_time return reference_time def _gdal_to_numpy_type(band): """Calculate the equivalent numpy datatype from a GDAL raster band type. This function doesn't handle complex or unknown types. If they are passed in, this function will raise a ValueError. Args: band (gdal.Band): GDAL Band Return: numpy_datatype (numpy.dtype): equivalent of band.DataType """ # doesn't include GDT_Byte because that's a special case base_gdal_type_to_numpy = { gdal.GDT_Int16: numpy.int16, gdal.GDT_Int32: numpy.int32, gdal.GDT_UInt16: numpy.uint16, gdal.GDT_UInt32: numpy.uint32, gdal.GDT_Float32: numpy.float32, gdal.GDT_Float64: numpy.float64, } if band.DataType in base_gdal_type_to_numpy: return base_gdal_type_to_numpy[band.DataType] if band.DataType != gdal.GDT_Byte: raise ValueError("Unsupported DataType: %s" % str(band.DataType)) # band must be GDT_Byte type, check if it is signed/unsigned metadata = band.GetMetadata('IMAGE_STRUCTURE') if 'PIXELTYPE' in metadata and metadata['PIXELTYPE'] == 'SIGNEDBYTE': return numpy.int8 return numpy.uint8
[docs]def merge_bounding_box_list(bounding_box_list, bounding_box_mode): """Create a single bounding box by union or intersection of the list. Args: bounding_box_list (sequence): a sequence of bounding box coordinates in the order [minx, miny, maxx, maxy]. mode (string): either ``'union'`` or ``'intersection'`` for the corresponding reduction mode. Return: A four tuple bounding box that is the union or intersection of the input bounding boxes. Raises: ValueError if the bounding boxes in ``bounding_box_list`` do not intersect if the ``bounding_box_mode`` is 'intersection'. """ def _merge_bounding_boxes(bb1, bb2, mode): """Merge two bounding boxes through union or intersection. Args: bb1, bb2 (sequence): sequence of float representing bounding box in the form bb=[minx,miny,maxx,maxy] mode (string); one of 'union' or 'intersection' Return: Reduced bounding box of bb1/bb2 depending on mode. """ def _less_than_or_equal(x_val, y_val): return x_val if x_val <= y_val else y_val def _greater_than(x_val, y_val): return x_val if x_val > y_val else y_val if mode == "union": comparison_ops = [ _less_than_or_equal, _less_than_or_equal, _greater_than, _greater_than] if mode == "intersection": comparison_ops = [ _greater_than, _greater_than, _less_than_or_equal, _less_than_or_equal] bb_out = [op(x, y) for op, x, y in zip(comparison_ops, bb1, bb2)] return bb_out result_bb = functools.reduce( functools.partial(_merge_bounding_boxes, mode=bounding_box_mode), bounding_box_list) if result_bb[0] > result_bb[2] or result_bb[1] > result_bb[3]: raise ValueError( "Bounding boxes do not intersect. Base list: %s mode: %s " " result: %s" % (bounding_box_list, bounding_box_mode, result_bb)) return result_bb
[docs]def get_gis_type(path): """Calculate the GIS type of the file located at ``path``. Args: path (str): path to a file on disk. Return: A bitwise OR of all GIS types that PyGeoprocessing models, currently this is ``pygeoprocessing.UNKNOWN_TYPE``, ``pygeoprocessing.RASTER_TYPE``, or ``pygeoprocessing.VECTOR_TYPE``. """ if not os.path.exists(path): raise ValueError("%s does not exist", path) from pygeoprocessing import UNKNOWN_TYPE gis_type = UNKNOWN_TYPE gis_raster = gdal.OpenEx(path, gdal.OF_RASTER) if gis_raster is not None: from pygeoprocessing import RASTER_TYPE gis_type |= RASTER_TYPE gis_raster = None gis_vector = gdal.OpenEx(path, gdal.OF_VECTOR) if gis_vector is not None: from pygeoprocessing import VECTOR_TYPE gis_type |= VECTOR_TYPE return gis_type
def _make_logger_callback(message): """Build a timed logger callback that prints ``message`` replaced. Args: message (string): a string that expects 2 placement %% variables, first for % complete from ``df_complete``, second from ``p_progress_arg[0]``. Return: Function with signature: logger_callback(df_complete, psz_message, p_progress_arg) """ def logger_callback(df_complete, _, p_progress_arg): """Argument names come from the GDAL API for callbacks.""" try: current_time = time.time() if ((current_time - logger_callback.last_time) > 5.0 or (df_complete == 1.0 and logger_callback.total_time >= 5.0)): # In some multiprocess applications I was encountering a # ``p_progress_arg`` of None. This is unexpected and I suspect # was an issue for some kind of GDAL race condition. So I'm # guarding against it here and reporting an appropriate log # if it occurs. if p_progress_arg: LOGGER.info(message, df_complete * 100, p_progress_arg[0]) else: LOGGER.info(message, df_complete * 100, '') logger_callback.last_time = current_time logger_callback.total_time += current_time except AttributeError: logger_callback.last_time = time.time() logger_callback.total_time = 0.0 except Exception: LOGGER.exception("Unhandled error occurred while logging " "progress. df_complete: %s, p_progress_arg: %s", df_complete, p_progress_arg) return logger_callback def _is_raster_path_band_formatted(raster_path_band): """Return true if raster path band is a (str, int) tuple/list.""" if not isinstance(raster_path_band, (list, tuple)): return False elif len(raster_path_band) != 2: return False elif not isinstance(raster_path_band[0], str): return False elif not isinstance(raster_path_band[1], int): return False else: return True def _convolve_2d_worker( signal_path_band, kernel_path_band, ignore_nodata, normalize_kernel, set_tol_to_zero, work_queue, write_queue): """Worker function to be used by ``convolve_2d``. Args: signal_path_band (tuple): a 2 tuple of the form (filepath to signal raster, band index). kernel_path_band (tuple): a 2 tuple of the form (filepath to kernel raster, band index). ignore_nodata (boolean): If true, any pixels that are equal to ``signal_path_band``'s nodata value are not included when averaging the convolution filter. normalize_kernel (boolean): If true, the result is divided by the sum of the kernel. set_tol_to_zero (float): Value to test close to to determine if values are zero, and if so, set to zero. work_queue (Queue): will contain (signal_offset, kernel_offset) tuples that can be used to read raster blocks directly using GDAL ReadAsArray(**offset). Indicates the block to operate on. write_queue (Queue): mechanism to pass result back to the writer contains a (index_dict, result, mask_result, left_index_raster, right_index_raster, top_index_raster, bottom_index_raster, left_index_result, right_index_result, top_index_result, bottom_index_result) tuple that's used for writing and masking. Return: None """ signal_raster = gdal.OpenEx(signal_path_band[0], gdal.OF_RASTER) kernel_raster = gdal.OpenEx(kernel_path_band[0], gdal.OF_RASTER) signal_band = signal_raster.GetRasterBand(signal_path_band[1]) kernel_band = kernel_raster.GetRasterBand(kernel_path_band[1]) signal_raster_info = get_raster_info(signal_path_band[0]) kernel_raster_info = get_raster_info(kernel_path_band[0]) n_cols_signal, n_rows_signal = signal_raster_info['raster_size'] n_cols_kernel, n_rows_kernel = kernel_raster_info['raster_size'] signal_nodata = signal_raster_info['nodata'][0] kernel_nodata = kernel_raster_info['nodata'][0] mask_result = None # in case no mask is needed, variable is still defined # calculate the kernel sum for normalization kernel_sum = 0.0 for _, kernel_block in iterblocks(kernel_path_band): if kernel_nodata is not None and ignore_nodata: kernel_block[numpy.isclose(kernel_block, kernel_nodata)] = 0.0 kernel_sum += numpy.sum(kernel_block) while True: payload = work_queue.get() if payload is None: break signal_offset, kernel_offset = payload # ensure signal and kernel are internally float64 precision # irrespective of their base type signal_block = signal_band.ReadAsArray(**signal_offset).astype( numpy.float64) kernel_block = kernel_band.ReadAsArray(**kernel_offset).astype( numpy.float64) # don't ever convolve the nodata value if signal_nodata is not None: signal_nodata_mask = numpy.isclose(signal_block, signal_nodata) signal_block[signal_nodata_mask] = 0.0 if not ignore_nodata: signal_nodata_mask[:] = 0 else: signal_nodata_mask = numpy.zeros( signal_block.shape, dtype=bool) left_index_raster = ( signal_offset['xoff'] - n_cols_kernel // 2 + kernel_offset['xoff']) right_index_raster = ( signal_offset['xoff'] - n_cols_kernel // 2 + kernel_offset['xoff'] + signal_offset['win_xsize'] + kernel_offset['win_xsize'] - 1) top_index_raster = ( signal_offset['yoff'] - n_rows_kernel // 2 + kernel_offset['yoff']) bottom_index_raster = ( signal_offset['yoff'] - n_rows_kernel // 2 + kernel_offset['yoff'] + signal_offset['win_ysize'] + kernel_offset['win_ysize'] - 1) # it's possible that the piece of the integrating kernel # doesn't affect the final result, if so we should skip if (right_index_raster < 0 or bottom_index_raster < 0 or left_index_raster > n_cols_signal or top_index_raster > n_rows_signal): continue if kernel_nodata is not None: kernel_block[numpy.isclose(kernel_block, kernel_nodata)] = 0.0 if normalize_kernel: kernel_block /= kernel_sum # determine the output convolve shape shape = ( numpy.array(signal_block.shape) + numpy.array(kernel_block.shape) - 1) # add zero padding so FFT is fast fshape = [_next_regular(int(d)) for d in shape] signal_fft = numpy.fft.rfftn(signal_block, fshape) kernel_fft = numpy.fft.rfftn(kernel_block, fshape) # this variable determines the output slice that doesn't include # the padded array region made for fast FFTs. fslice = tuple([slice(0, int(sz)) for sz in shape]) # classic FFT convolution result = numpy.fft.irfftn(signal_fft * kernel_fft, fshape)[fslice] # nix any roundoff error if set_tol_to_zero is not None: result[numpy.isclose(result, set_tol_to_zero)] = 0.0 # if we're ignoring nodata, we need to make a convolution of the # nodata mask too if ignore_nodata: mask_fft = numpy.fft.rfftn( numpy.where(signal_nodata_mask, 0.0, 1.0), fshape) mask_result = numpy.fft.irfftn( mask_fft * kernel_fft, fshape)[fslice] left_index_result = 0 right_index_result = result.shape[1] top_index_result = 0 bottom_index_result = result.shape[0] # we might abut the edge of the raster, clip if so if left_index_raster < 0: left_index_result = -left_index_raster left_index_raster = 0 if top_index_raster < 0: top_index_result = -top_index_raster top_index_raster = 0 if right_index_raster > n_cols_signal: right_index_result -= right_index_raster - n_cols_signal right_index_raster = n_cols_signal if bottom_index_raster > n_rows_signal: bottom_index_result -= ( bottom_index_raster - n_rows_signal) bottom_index_raster = n_rows_signal # Add result to current output to account for overlapping edges index_dict = { 'xoff': left_index_raster, 'yoff': top_index_raster, 'win_xsize': right_index_raster-left_index_raster, 'win_ysize': bottom_index_raster-top_index_raster } write_queue.put( (index_dict, result, mask_result, left_index_raster, right_index_raster, top_index_raster, bottom_index_raster, left_index_result, right_index_result, top_index_result, bottom_index_result)) # Indicates worker has terminated write_queue.put(None) def _assert_is_valid_pixel_size(target_pixel_size): """Return true if ``target_pixel_size`` is a valid 2 element sequence. Raises ValueError if not a two element list/tuple and/or the values in the sequence are not numerical. """ def _is_number(x): """Return true if x is a number.""" try: if isinstance(x, str): return False float(x) return True except (ValueError, TypeError): return False if not isinstance(target_pixel_size, (list, tuple)): raise ValueError( "target_pixel_size is not a tuple, its value was '%s'", repr(target_pixel_size)) if (len(target_pixel_size) != 2 or not all([_is_number(x) for x in target_pixel_size])): raise ValueError( "Invalid value for `target_pixel_size`, expected two numerical " "elements, got: %s", repr(target_pixel_size)) return True
[docs]def shapely_geometry_to_vector( shapely_geometry_list, target_vector_path, projection_wkt, vector_format, fields=None, attribute_list=None, ogr_geom_type=ogr.wkbPolygon): """Convert list of geometry to vector on disk. Args: shapely_geometry_list (list): a list of Shapely objects. target_vector_path (str): path to target vector. projection_wkt (str): WKT for target vector. vector_format (str): GDAL driver name for target vector. fields (dict): a python dictionary mapping string fieldname to OGR Fieldtypes, if None no fields are added attribute_list (list of dicts): a list of python dictionary mapping fieldname to field value for each geometry in `shapely_geometry_list`, if None, no attributes are created. ogr_geom_type (ogr geometry enumerated type): sets the target layer geometry type. Defaults to wkbPolygon. Return: None """ if fields is None: fields = {} if attribute_list is None: attribute_list = [{} for _ in range(len(shapely_geometry_list))] num_geoms = len(shapely_geometry_list) num_attrs = len(attribute_list) if num_geoms != num_attrs: raise ValueError( f"Geometry count ({num_geoms}) and attribute count " f"({num_attrs}) do not match.") vector_driver = ogr.GetDriverByName(vector_format) target_vector = vector_driver.CreateDataSource(target_vector_path) layer_name = os.path.basename(os.path.splitext(target_vector_path)[0]) projection = osr.SpatialReference() projection.ImportFromWkt(projection_wkt) target_layer = target_vector.CreateLayer( layer_name, srs=projection, geom_type=ogr_geom_type) for field_name, field_type in fields.items(): target_layer.CreateField(ogr.FieldDefn(field_name, field_type)) layer_defn = target_layer.GetLayerDefn() for field_index, (shapely_feature, feature_attributes) in enumerate( zip(shapely_geometry_list, attribute_list)): new_feature = ogr.Feature(layer_defn) new_geometry = ogr.CreateGeometryFromWkb(shapely_feature.wkb) new_feature.SetGeometry(new_geometry) if set(fields.keys()) != set(feature_attributes.keys()): raise ValueError( f"The fields and attributes for feature {field_index} " f"do not match. Field definitions={fields}. " f"Feature attributes={feature_attributes}.") for field_name, field_value in feature_attributes.items(): new_feature.SetField(field_name, field_value) target_layer.CreateFeature(new_feature) target_layer = None target_vector = None
[docs]def numpy_array_to_raster( base_array, target_nodata, pixel_size, origin, projection_wkt, target_path, raster_driver_creation_tuple=DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS): """Create a single band raster of size ``base_array.shape``. Args: base_array (numpy.array): a 2d numpy array. target_nodata (numeric): nodata value of target array, can be None. pixel_size (tuple): square dimensions (in ``(x, y)``) of pixel. origin (tuple/list): x/y coordinate of the raster origin. projection_wkt (str): target projection in wkt. target_path (str): path to raster to create that will be of the same type of base_array with contents of base_array. raster_driver_creation_tuple (tuple): a tuple containing a GDAL driver name string as the first element and a GDAL creation options tuple/list as the second. Defaults to geoprocessing.DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS. Return: None """ numpy_to_gdal_type = { numpy.dtype(bool): gdal.GDT_Byte, numpy.dtype(numpy.int8): gdal.GDT_Byte, numpy.dtype(numpy.uint8): gdal.GDT_Byte, numpy.dtype(numpy.int16): gdal.GDT_Int16, numpy.dtype(numpy.int32): gdal.GDT_Int32, numpy.dtype(numpy.uint16): gdal.GDT_UInt16, numpy.dtype(numpy.uint32): gdal.GDT_UInt32, numpy.dtype(numpy.float32): gdal.GDT_Float32, numpy.dtype(numpy.float64): gdal.GDT_Float64, numpy.dtype(numpy.csingle): gdal.GDT_CFloat32, numpy.dtype(numpy.complex64): gdal.GDT_CFloat64, } raster_driver = gdal.GetDriverByName(raster_driver_creation_tuple[0]) ny, nx = base_array.shape new_raster = raster_driver.Create( target_path, nx, ny, 1, numpy_to_gdal_type[base_array.dtype], options=raster_driver_creation_tuple[1]) if projection_wkt is not None: new_raster.SetProjection(projection_wkt) new_raster.SetGeoTransform( [origin[0], pixel_size[0], 0.0, origin[1], 0.0, pixel_size[1]]) new_band = new_raster.GetRasterBand(1) if target_nodata is not None: new_band.SetNoDataValue(target_nodata) new_band.WriteArray(base_array) new_band = None new_raster = None
[docs]def raster_to_numpy_array(raster_path, band_id=1): """Read the entire contents of the raster band to a numpy array. Args: raster_path (str): path to raster. band_id (int): band in the raster to read. Return: numpy array contents of `band_id` in raster. """ raster = gdal.OpenEx(raster_path, gdal.OF_RASTER) band = raster.GetRasterBand(band_id) array = band.ReadAsArray() band = None raster = None return array
[docs]def stitch_rasters( base_raster_path_band_list, resample_method_list, target_stitch_raster_path_band, overlap_algorithm='etch', area_weight_m2_to_wgs84=False, osr_axis_mapping_strategy=DEFAULT_OSR_AXIS_MAPPING_STRATEGY): """Stitch the raster in the base list into the existing target. Args: base_raster_path_band_list (sequence): sequence of raster path/band tuples to stitch into target. resample_method_list (sequence): a sequence of resampling methods which one to one map each path in ``base_raster_path_band_list`` during resizing. Each element must be one of "near|bilinear|cubic|cubicspline|lanczos|mode". target_stitch_raster_path_band (tuple): raster path/band tuple to an existing raster, values in ``base_raster_path_band_list`` will be stitched into this raster/band in the order they are in the list. The nodata value for the target band must be defined and will be written over with values from the base raster. Nodata values in the base rasters will not be written into the target. If the pixel size or projection are different between base and target the base is warped to the target's cell size and target with the interpolation method provided. If any part of the base raster lies outside of the target, that part of the base is ignored. A warning is logged if the entire base raster is outside of the target bounds. overlap_algorithm (str): this value indicates which algorithm to use when a raster is stitched on non-nodata values in the target stitch raster. It can be one of the following: 'etch': write a value to the target raster only if the target raster pixel is nodata. If the target pixel is non-nodata ignore any additional values to write on that pixel. 'replace': write a value to the target raster irrespective of the value of the target raster 'add': add the value to be written to the target raster to any existing value that is there. If the existing value is nodata, treat it as 0.0. area_weight_m2_to_wgs84 (bool): If ``True`` the stitched raster will be converted to a per-area value before reprojection to wgs84, then multiplied by the m^2 area per pixel in the wgs84 coordinate space. This is useful when the quantity being stitched is a total quantity per pixel rather than a per unit area density. Note this assumes input rasters are in a projected space of meters, if they are not the stitched output will be nonsensical. osr_axis_mapping_strategy (int): OSR axis mapping strategy for ``SpatialReference`` objects. Defaults to ``geoprocessing.DEFAULT_OSR_AXIS_MAPPING_STRATEGY``. This parameter should not be changed unless you know what you are doing. Return: None. """ valid_overlap_algorithms = ['etch', 'replace', 'add'] if overlap_algorithm not in valid_overlap_algorithms: raise ValueError( f'overlap algorithm {overlap_algorithm} is not one of ' f'{valid_overlap_algorithms}') if not _is_raster_path_band_formatted(target_stitch_raster_path_band): raise ValueError( f'Expected raster path/band tuple for ' f'target_stitch_raster_path_band but got ' f'"{target_stitch_raster_path_band}"') if len(base_raster_path_band_list) != len(resample_method_list): raise ValueError( f'Expected same number of elements in ' f'`base_raster_path_band_list` as `resample_method_list` but ' f'got {len(base_raster_path_band_list)} != ' f'{len(resample_method_list)} respectively') if not os.path.exists(target_stitch_raster_path_band[0]): raise ValueError( f'Target stitch raster does not exist: ' f'"{target_stitch_raster_path_band[0]}"') gis_type = get_gis_type(target_stitch_raster_path_band[0]) from pygeoprocessing import RASTER_TYPE if gis_type != RASTER_TYPE: raise ValueError( f'Target stitch raster is not a raster. ' f'Location: "{target_stitch_raster_path_band[0]}" ' f'GIS type: {gis_type}') target_raster_info = get_raster_info(target_stitch_raster_path_band[0]) if target_stitch_raster_path_band[1] > len(target_raster_info['nodata']): raise ValueError( f'target_stitch_raster_path_band refers to a band that exceeds ' f'the number of bands in the raster:\n' f'target_stitch_raster_path_band[1]: ' f'{target_stitch_raster_path_band[1]} ' f'n bands: {len(target_raster_info["nodata"])}') target_nodata = target_raster_info['nodata'][ target_stitch_raster_path_band[1]-1] if target_nodata is None: raise ValueError( f'target stitch raster at "{target_stitch_raster_path_band[0]} "' f'nodata value is `None`, expected non-`None` value') target_raster = gdal.OpenEx( target_stitch_raster_path_band[0], gdal.OF_RASTER | gdal.GA_Update) target_band = target_raster.GetRasterBand( target_stitch_raster_path_band[1]) target_inv_gt = gdal.InvGeoTransform(target_raster_info['geotransform']) target_raster_x_size, target_raster_y_size = target_raster_info[ 'raster_size'] for (raster_path, raster_band_id), resample_method in zip( base_raster_path_band_list, resample_method_list): LOGGER.info( f'stitching {(raster_path, raster_band_id)} into ' f'{target_stitch_raster_path_band}') raster_info = get_raster_info(raster_path) projected_raster_bounding_box = transform_bounding_box( raster_info['bounding_box'], raster_info['projection_wkt'], target_raster_info['projection_wkt']) try: # merge the bounding boxes only to see if they don't intersect _ = merge_bounding_box_list( [projected_raster_bounding_box, target_raster_info['bounding_box']], 'intersection') except ValueError: LOGGER.warning( f'the raster at "{raster_path}"" does not intersect the ' f'stitch raster at "{target_stitch_raster_path_band[0]}", ' f'skipping...') continue # use this to determine if we need to warp and delete if we did at # the end if (raster_info['projection_wkt'] == target_raster_info['projection_wkt'] and raster_info['pixel_size'] == target_raster_info['pixel_size']): warped_raster = False base_stitch_raster_path = raster_path else: workspace_dir = tempfile.mkdtemp( dir=os.path.dirname(target_stitch_raster_path_band[0]), prefix='stitch_rasters_workspace') base_stitch_raster_path = os.path.join( workspace_dir, os.path.basename(raster_path)) warp_raster( raster_path, target_raster_info['pixel_size'], base_stitch_raster_path, resample_method, target_projection_wkt=target_raster_info['projection_wkt'], working_dir=workspace_dir, osr_axis_mapping_strategy=osr_axis_mapping_strategy) warped_raster = True if warped_raster and area_weight_m2_to_wgs84: # determine base area per pixel currently and area per pixel # once it is projected to wgs84 pixel sizes base_pixel_area_m2 = abs(numpy.prod(raster_info['pixel_size'])) base_stitch_raster_info = get_raster_info( base_stitch_raster_path) _, lat_min, _, lat_max = base_stitch_raster_info['bounding_box'] n_rows = base_stitch_raster_info['raster_size'][1] # this column is a longitude invariant latitude variant pixel # area for scaling area dependent values m2_area_per_lat = _create_latitude_m2_area_column( lat_min, lat_max, n_rows) def _mult_op(base_array, base_nodata, scale, datatype): """Scale non-nodata by scale.""" result = base_array.astype(datatype) if base_nodata is not None: valid_mask = ~numpy.isclose(base_array, base_nodata) else: valid_mask = numpy.ones( base_array.shape, dtype=bool) result[valid_mask] = result[valid_mask] * scale[valid_mask] return result base_stitch_nodata = base_stitch_raster_info['nodata'][0] scaled_raster_path = os.path.join( workspace_dir, f'scaled_{os.path.basename(base_stitch_raster_path)}') # multiply the pixels in the resampled raster by the ratio of # the pixel area in the wgs84 units divided by the area of the # original pixel raster_calculator( [(base_stitch_raster_path, 1), (base_stitch_nodata, 'raw'), m2_area_per_lat/base_pixel_area_m2, (_GDAL_TYPE_TO_NUMPY_LOOKUP[ target_raster_info['datatype']], 'raw')], _mult_op, scaled_raster_path, target_raster_info['datatype'], base_stitch_nodata) # swap the result to base stitch so the rest of the function # operates on the area scaled raster os.remove(base_stitch_raster_path) base_stitch_raster_path = scaled_raster_path base_raster = gdal.OpenEx(base_stitch_raster_path, gdal.OF_RASTER) base_gt = base_raster.GetGeoTransform() base_band = base_raster.GetRasterBand(raster_band_id) base_nodata = base_band.GetNoDataValue() # Get the target upper left xoff/yoff w/r/t the stitch raster 0,0 # coordinates target_to_base_xoff, target_to_base_yoff = [ int(_) for _ in gdal.ApplyGeoTransform( target_inv_gt, *gdal.ApplyGeoTransform(base_gt, 0, 0))] for offset_dict in iterblocks( (base_stitch_raster_path, raster_band_id), offset_only=True): _offset_vars = {} overlap = True for (target_to_base_off, off_val, target_off_id, off_clip_id, win_size_id, raster_size) in [ (target_to_base_xoff, offset_dict['xoff'], 'target_xoff', 'xoff_clip', 'win_xsize', target_raster_x_size), (target_to_base_yoff, offset_dict['yoff'], 'target_yoff', 'yoff_clip', 'win_ysize', target_raster_y_size)]: _offset_vars[target_off_id] = (target_to_base_off+off_val) if _offset_vars[target_off_id] >= raster_size: overlap = False break # how far to move right to get in the target raster _offset_vars[off_clip_id] = 0 if _offset_vars[target_off_id] < 0: _offset_vars[off_clip_id] = -_offset_vars[target_off_id] _offset_vars[win_size_id] = offset_dict[win_size_id] if _offset_vars[off_clip_id] >= _offset_vars[win_size_id]: # its too far left for the whole window overlap = False break # make the _offset_vars[win_size_id] smaller if it shifts # off the target window if (_offset_vars[off_clip_id] + _offset_vars[target_off_id] + _offset_vars[win_size_id] >= raster_size): _offset_vars[win_size_id] -= ( _offset_vars[off_clip_id] + _offset_vars[target_off_id] + _offset_vars[win_size_id] - raster_size) if not overlap: continue target_array = target_band.ReadAsArray( xoff=_offset_vars['target_xoff']+_offset_vars['xoff_clip'], yoff=_offset_vars['target_yoff']+_offset_vars['yoff_clip'], win_xsize=_offset_vars['win_xsize'], win_ysize=_offset_vars['win_ysize']) target_nodata_mask = numpy.isclose(target_array, target_nodata) base_array = base_band.ReadAsArray( xoff=offset_dict['xoff']+_offset_vars['xoff_clip'], yoff=offset_dict['yoff']+_offset_vars['yoff_clip'], win_xsize=_offset_vars['win_xsize'], win_ysize=_offset_vars['win_ysize']) if base_nodata is not None: base_nodata_mask = numpy.isclose(base_array, base_nodata) else: base_nodata_mask = numpy.zeros( base_array.shape, dtype=bool) if overlap_algorithm == 'etch': # place values only where target is nodata valid_mask = ~base_nodata_mask & target_nodata_mask target_array[valid_mask] = base_array[valid_mask] elif overlap_algorithm == 'replace': # write valid values into the target -- disregard any # existing values in the target valid_mask = ~base_nodata_mask target_array[valid_mask] = base_array[valid_mask] elif overlap_algorithm == 'add': # add values to the target and treat target nodata as 0. valid_mask = ~base_nodata_mask masked_target_array = target_array[valid_mask] target_array_nodata_mask = numpy.isclose( masked_target_array, target_nodata) target_array[valid_mask] = ( base_array[valid_mask] + numpy.where( target_array_nodata_mask, 0, masked_target_array)) else: raise RuntimeError( f'overlap_algorithm {overlap_algorithm} was not defined ' f'but also not detected earlier -- this should never ' f'happen') target_band.WriteArray( target_array, xoff=_offset_vars['target_xoff']+_offset_vars['xoff_clip'], yoff=_offset_vars['target_yoff']+_offset_vars['yoff_clip']) base_raster = None base_band = None if warped_raster: shutil.rmtree(workspace_dir) target_raster = None target_band = None
def _m2_area_of_wg84_pixel(pixel_size, center_lat): """Calculate m^2 area of a square wgs84 pixel. Adapted from: https://gis.stackexchange.com/a/127327/2397 Args: pixel_size (float): length of side of a square pixel in degrees. center_lat (float): latitude of the center of the pixel. Note this value +/- half the `pixel-size` must not exceed 90/-90 degrees latitude or an invalid area will be calculated. Returns: Area of square pixel of side length `pixel_size` centered at `center_lat` in m^2. """ a = 6378137 # meters b = 6356752.3142 # meters e = math.sqrt(1 - (b/a)**2) area_list = [] for f in [center_lat+pixel_size/2, center_lat-pixel_size/2]: zm = 1 - e*math.sin(math.radians(f)) zp = 1 + e*math.sin(math.radians(f)) area_list.append( math.pi * b**2 * ( math.log(zp/zm) / (2*e) + math.sin(math.radians(f)) / (zp*zm))) return abs(pixel_size / 360. * (area_list[0] - area_list[1])) def _create_latitude_m2_area_column(lat_min, lat_max, n_pixels): """Create a (n, 1) sized numpy array with m^2 areas in each element. Creates a per pixel m^2 area array that varies with changes in latitude. This array can be used to scale values by area when converting to or from a WGS84 projection to a projected one. Args: lat_max (float): maximum latitude in the bound lat_min (float): minimum latitude in the bound n_pixels (int): number of pixels to create for the column. The size of the target square pixels are (lat_max-lat_min)/n_pixels degrees per side. Return: A (n, 1) sized numpy array whose elements are the m^2 areas in each element estimated by the latitude value at the center of each pixel. """ pixel_size = (lat_max - lat_min) / n_pixels center_lat_array = numpy.linspace( lat_min+pixel_size/2, lat_max-pixel_size/2, n_pixels) area_array = numpy.array([ _m2_area_of_wg84_pixel(pixel_size, lat) for lat in reversed(center_lat_array)]).reshape((n_pixels, 1)) return area_array