"""Manager for interacting with NLCD datasets."""
import os, sys
import logging
import xarray as xr
import rioxarray # needed to get rio, even though not used.
import geopandas as gpd
import shapely.geometry
import cftime
from typing import Tuple, List, Optional
import pygeohydro
import pygeohydro.helpers
from watershed_workflow.crs import CRS
from . import manager_dataset
colors = {
0: ('None', (1., 1., 1.)),
11: ('Open Water', (0.27843137255, 0.41960784314, 0.62745098039)),
12: ('Perrenial Ice/Snow', (0.81960784314, 0.86666666667, 0.97647058824)),
21: ('Developed, Open Space', (0.86666666667, 0.78823529412, 0.78823529412)),
22: ('Developed, Low Intensity', (0.84705882353, 0.57647058824, 0.50980392157)),
23: ('Developed, Medium Intensity', (0.92941176471, 0.00000000000, 0.00000000000)),
24: ('Developed, High Intensity', (0.66666666667, 0.00000000000, 0.00000000000)),
31: ('Barren Land', (0.69803921569, 0.67843137255, 0.63921568628)),
41: ('Deciduous Forest', (0.40784313726, 0.66666666667, 0.38823529412)),
42: ('Evergreen Forest', (0.10980392157, 0.38823529412, 0.18823529412)),
43: ('Mixed Forest', (0.70980392157, 0.78823529412, 0.55686274510)),
51: ('Dwarf Scrub', (0.64705882353, 0.54901960784, 0.18823529412)),
52: ('Shrub/Scrub', (0.80000000000, 0.72941176471, 0.48627450980)),
71: ('Grassland/Herbaceous', (0.88627450980, 0.88627450980, 0.75686274510)),
72: ('Sedge/Herbaceous', (0.78823529412, 0.78823529412, 0.46666666667)),
73: ('Lichens', (0.60000000000, 0.75686274510, 0.27843137255)),
74: ('Moss', (0.46666666667, 0.67843137255, 0.57647058824)),
81: ('Pasture/Hay', (0.85882352941, 0.84705882353, 0.23921568628)),
82: ('Cultivated Crops', (0.66666666667, 0.43921568628, 0.15686274510)),
90: ('Woody Wetlands', (0.72941176471, 0.84705882353, 0.91764705882)),
95: ('Emergent Herbaceous Wetlands', (0.43921568628, 0.63921568628, 0.72941176471)),
127: ('None', (1., 1., 1.)),
}
indices = dict([(pars[0], id) for (id, pars) in colors.items()])
[docs]
class ManagerNLCD(manager_dataset.ManagerDataset):
"""National Land Cover Database manager for single-year snapshots.
Supports variables: cover, impervious, canopy, descriptor.
Each manager instance represents a single year of NLCD data.
Parameters
----------
location : str, optional
Location code ('L48', 'AK', 'HI', 'PR'). Default 'L48'.
year : int, optional
NLCD data year. If None, uses most recent available for location.
.. [NLCD] https://www.mrlc.gov/
"""
colors = colors
indices = indices
def __init__(self, location='L48', year=None):
"""Initialize NLCD manager for specific location and year.
Parameters
----------
location : str, optional
Location code ('L48', 'AK', 'HI', 'PR'). Default 'L48'.
year : int, optional
NLCD data year. If None, uses most recent available for location.
"""
self.location = self._validateLocation(location)
self.year = self._validateYear(year, location)
# NLCD is non-temporal - each instance represents one year snapshot
native_crs = CRS.from_epsg(4326) # WGS84 Geographic
super().__init__(
name=f'NLCD {self.year} {self.location}',
source='pygeohydro',
native_resolution=0.00027, # ~30m in degrees (approximately 30m at mid-latitudes)
native_crs_in=native_crs, # Expected input CRS
native_crs_out=native_crs, # Output data CRS
native_start=None, # Non-temporal data
native_end=None, # Non-temporal data
valid_variables=['cover', 'impervious', 'canopy', 'descriptor'],
default_variables=['cover']
)
def _validateLocation(self, location):
"""Validate location parameter."""
valid_locations = ['L48', 'AK', 'HI', 'PR']
if location not in valid_locations:
raise ValueError(f'NLCD invalid location "{location}", valid are: {valid_locations}')
return location
def _validateYear(self, year, location):
"""Validate year for given location."""
valid_years = {
'L48': [2021, 2019, 2016, 2013, 2011, 2008, 2006, 2004, 2001],
'AK': [2016, 2011, 2001],
'HI': [2001],
'PR': [2001]
}
if year is None:
return valid_years[location][0] # Most recent
if year not in valid_years[location]:
raise ValueError(f'NLCD invalid year "{year}" for location {location}, '
f'valid are: {valid_years[location]}')
return year
def _requestDataset(self, request: manager_dataset.ManagerDataset.Request
) -> manager_dataset.ManagerDataset.Request:
"""Request NLCD data - ready immediately.
Parameters
----------
request : ManagerDataset.Request
Dataset request with preprocessed parameters.
Returns
-------
ManagerDataset.Request
Updated request marked as ready.
"""
request.is_ready = True
return request
def _fetchDataset(self, request: manager_dataset.ManagerDataset.Request) -> xr.Dataset:
"""Fetch NLCD data for the request.
Parameters
----------
request : ManagerDataset.Request
Dataset request with preprocessed parameters.
Returns
-------
xr.Dataset
Dataset with requested NLCD variables for the specified year.
"""
# Extract parameters from request
geometry = request.geometry
variables = request.variables
assert variables is not None, "Variables should not be None for multi-variable NLCD data"
# Create GeoDataFrame with native CRS (geometry is already in native_crs_in)
geom_df = gpd.GeoDataFrame(geometry=[geometry], crs=self.native_crs_in)
# Build years dict for pygeohydro - single year for all variables
years_dict = {var: [self.year] for var in variables}
# Fetch data using pygeohydro
data_dict = pygeohydro.nlcd_bygeom(
geom_df,
resolution=30, # Use 30 meters (pygeohydro expects meters)
years=years_dict,
region=self.location,
)
# Extract the dataset (dict key is GeoDataFrame index, we have index 0)
raw_dataset = data_dict[0]
# Create final dataset with variable names as keys (not prefixed)
final_dataset = xr.Dataset()
for var in variables:
# pygeohydro returns variables as 'var_year'
source_key = f'{var}_{self.year}'
if source_key in raw_dataset:
final_dataset[var] = raw_dataset[source_key]
else:
raise ValueError(f"Variable {var} for year {self.year} not found in pygeohydro response")
# Add metadata attributes
final_dataset.attrs['nlcd_year'] = self.year
final_dataset.attrs['nlcd_location'] = self.location
return final_dataset