"""Manager for interacting with NLCD datasets."""
import os, sys
import logging
import numpy as np
import shapely
import rasterio
import rasterio.mask
import watershed_workflow.sources.utils as source_utils
import watershed_workflow.config
import watershed_workflow.warp
import watershed_workflow.sources.names
colors = {
0: ('None', (0.00000000000, 0.00000000000, 0.00000000000)),
11: ('Open Water', (0.27843137255, 0.41960784314, 0.62745098039)),
12: ('Perrenial Ice/Snow', (0.81960784314, 0.86666666667, 0.97647058824)),
21: ('Developed, Open Space', (0.86666666667, 0.78823529412, 0.78823529412)),
22: ('Developed, Low Intensity', (0.84705882353, 0.57647058824, 0.50980392157)),
23: ('Developed, Medium Intensity', (0.92941176471, 0.00000000000, 0.00000000000)),
24: ('Developed, High Intensity', (0.66666666667, 0.00000000000, 0.00000000000)),
31: ('Barren Land', (0.69803921569, 0.67843137255, 0.63921568628)),
41: ('Deciduous Forest', (0.40784313726, 0.66666666667, 0.38823529412)),
42: ('Evergreen Forest', (0.10980392157, 0.38823529412, 0.18823529412)),
43: ('Mixed Forest', (0.70980392157, 0.78823529412, 0.55686274510)),
51: ('Dwarf Scrub', (0.64705882353, 0.54901960784, 0.18823529412)),
52: ('Shrub/Scrub', (0.80000000000, 0.72941176471, 0.48627450980)),
71: ('Grassland/Herbaceous', (0.88627450980, 0.88627450980, 0.75686274510)),
72: ('Sedge/Herbaceous', (0.78823529412, 0.78823529412, 0.46666666667)),
73: ('Lichens', (0.60000000000, 0.75686274510, 0.27843137255)),
74: ('Moss', (0.46666666667, 0.67843137255, 0.57647058824)),
81: ('Pasture/Hay', (0.85882352941, 0.84705882353, 0.23921568628)),
82: ('Cultivated Crops', (0.66666666667, 0.43921568628, 0.15686274510)),
90: ('Woody Wetlands', (0.72941176471, 0.84705882353, 0.91764705882)),
95: ('Emergent Herbaceous Wetlands', (0.43921568628, 0.63921568628, 0.72941176471)),
}
indices = dict([(pars[0], id) for (id, pars) in colors.items()])
[docs]
class FileManagerNLCD:
"""National Land Cover Database provides a raster for indexed land cover types
[NLCD]_.
.. note:: NLCD does not provide an API for subsetting the data, so the
first time this is used, it WILL result in a long download time as it
grabs the big file. After that it will be much faster as the file is
already local.
TODO: Labels and colors for these indices should get moved here, but
currently reside in watershed_workflow.colors.
Parameters
----------
layer : str, optional
Layer of interest. Default is `"land_cover`", should also be one for at
least imperviousness, maybe others?
year : int, optional
Year of dataset. Defaults to the most current available at the location.
location : str, optional
Location code. Default is `"L48`" (lower 48), valid include `"AK`"
(Alaska), `"HI`" (Hawaii, and `"PR`" (Puerto Rico).
.. [NLCD] https://www.mrlc.gov/
"""
colors = colors
indices = indices
url_pattern = 'https://s3-us-west-2.amazonaws.com/mrlc/nlcd_{YEAR}_{PRODUCT}_{LOCATION}_{VERSION}.zip'
def __init__(self, layer='Land_Cover', year=None, location='L48', version='20210604'):
self.layer, self.year, self.location = self.validate_input(layer, year, location)
self.version = version
self.layer_name = 'NLCD_{1}_{0}_{2}'.format(self.layer, self.year, self.location)
self.name = 'National Land Cover Database (NLCD) Layer: {}'.format(self.layer_name)
self.names = watershed_workflow.sources.names.Names(self.name, 'land_cover',
self.layer_name,
self.layer_name + '.img')
self.url = self.url_pattern.format(YEAR=self.year,
PRODUCT=self.layer.lower(),
LOCATION=self.location.lower(),
VERSION=self.version)
def validate_input(self, layer, year, location):
"""Validates input to the __init__ method."""
valid_layers = ['Land_Cover', 'Imperviousness']
if layer not in valid_layers:
raise ValueError('NLCD invalid layer "{}" requested, valid are: {}'.format(
layer, valid_layers))
valid_locations = ['L48', 'AK', 'HI', 'PR']
if location not in valid_locations:
raise ValueError('NLCD invalid location "{}" requested, valid are: {}'.format(
location, valid_locations))
valid_years = {
'L48': [2019, 2016, 2013, 2011, 2008, 2006, 2004, 2001],
'AK': [2011, 2001],
'HI': [2001, ],
'PR': [2001, ],
}
if year is None:
year = valid_years[location][0]
else:
if year not in valid_years[location]:
raise ValueError(
'NLCD invalid year "{}" requested for location {}, valid are: {}'.format(
year, location, valid_years[location]))
return layer, year, location
[docs]
def get_raster(self, shply, crs, force_download=False):
"""Download and read a DEM for this shape, clipping to the shape.
Parameters
----------
shply : fiona or shapely shape
Shape to provide bounds of the raster.
crs : CRS
CRS of the shape.
force_download : bool, optional
Download or re-download the file if true.
Returns
-------
profile : rasterio profile
Profile of the raster.
raster : np.ndarray
Array containing the elevation data.
Note that the raster provided is in NLCD native CRS (which is in the
rasterio profile), not the shape's CRS.
"""
# get shape as a shapely, single Polygon
if type(shply) is dict:
shply = watershed_workflow.utils.create_shply(shply['geometry'])
if type(shply) is shapely.geometry.MultiPolygon:
shply = shapely.ops.unary_union(shply)
# download (or hopefully don't) the file
filename, nlcd_profile = self._download()
logging.debug('CRS: {}'.format(nlcd_profile['crs']))
# warp to crs
shply = watershed_workflow.warp.shply(
shply, crs, watershed_workflow.crs.from_rasterio(nlcd_profile['crs']))
# load raster
with rasterio.open(filename, 'r') as fid:
profile = fid.profile
out_image, out_transform = rasterio.mask.mask(fid, [shply, ], crop=True, nodata=0)
profile.update({
"height": out_image.shape[1],
"width": out_image.shape[2],
"transform": out_transform,
"nodata": 0
})
assert (len(out_image.shape) == 3)
return profile, out_image[0, :, :]
def _download(self, force=False):
"""Download the files, returning list of filenames."""
# check directory structure
os.makedirs(self.names.data_dir(), exist_ok=True)
work_folder = self.names.raw_folder_name()
os.makedirs(work_folder, exist_ok=True)
filename = self.names.file_name()
logging.info(' filename: {}'.format(filename))
if not os.path.exists(filename) or force:
downloadfile = os.path.join(work_folder, self.url.split("/")[-1])
source_utils.download(self.url, downloadfile, force)
source_utils.unzip(downloadfile, work_folder)
# hope we can find it?
img_files = [f for f in os.listdir(work_folder) if f.endswith('.img')]
assert (len(img_files) == 1)
target = os.path.join(work_folder, img_files[0])
os.rename(target, filename)
for suffix in ['ige', 'rde', 'rrd', 'xml']:
os.rename(target[:-3] + suffix, filename[:-3] + suffix)
with rasterio.open(filename, 'r') as fid:
profile = fid.profile
return filename, profile