"""Manager for interacting with NLCD datasets."""
import os, sys
import logging
import xarray as xr
import rioxarray # needed to get rio, even though not used.
import geopandas as gpd
import shapely.geometry
import cftime
from typing import Tuple, List, Optional
import pygeohydro
import pygeohydro.helpers
from watershed_workflow.crs import CRS
from . import manager_dataset
colors = {
    0: ('None', (1., 1., 1.)),
    11: ('Open Water', (0.27843137255, 0.41960784314, 0.62745098039)),
    12: ('Perrenial Ice/Snow', (0.81960784314, 0.86666666667, 0.97647058824)),
    21: ('Developed, Open Space', (0.86666666667, 0.78823529412, 0.78823529412)),
    22: ('Developed, Low Intensity', (0.84705882353, 0.57647058824, 0.50980392157)),
    23: ('Developed, Medium Intensity', (0.92941176471, 0.00000000000, 0.00000000000)),
    24: ('Developed, High Intensity', (0.66666666667, 0.00000000000, 0.00000000000)),
    31: ('Barren Land', (0.69803921569, 0.67843137255, 0.63921568628)),
    41: ('Deciduous Forest', (0.40784313726, 0.66666666667, 0.38823529412)),
    42: ('Evergreen Forest', (0.10980392157, 0.38823529412, 0.18823529412)),
    43: ('Mixed Forest', (0.70980392157, 0.78823529412, 0.55686274510)),
    51: ('Dwarf Scrub', (0.64705882353, 0.54901960784, 0.18823529412)),
    52: ('Shrub/Scrub', (0.80000000000, 0.72941176471, 0.48627450980)),
    71: ('Grassland/Herbaceous', (0.88627450980, 0.88627450980, 0.75686274510)),
    72: ('Sedge/Herbaceous', (0.78823529412, 0.78823529412, 0.46666666667)),
    73: ('Lichens', (0.60000000000, 0.75686274510, 0.27843137255)),
    74: ('Moss', (0.46666666667, 0.67843137255, 0.57647058824)),
    81: ('Pasture/Hay', (0.85882352941, 0.84705882353, 0.23921568628)),
    82: ('Cultivated Crops', (0.66666666667, 0.43921568628, 0.15686274510)),
    90: ('Woody Wetlands', (0.72941176471, 0.84705882353, 0.91764705882)),
    95: ('Emergent Herbaceous Wetlands', (0.43921568628, 0.63921568628, 0.72941176471)),
    127: ('None', (1., 1., 1.)),
}
indices = dict([(pars[0], id) for (id, pars) in colors.items()])
[docs]
class ManagerNLCD(manager_dataset.ManagerDataset):
    """National Land Cover Database manager for single-year snapshots.
    Supports variables: cover, impervious, canopy, descriptor.
    Each manager instance represents a single year of NLCD data.
    Parameters
    ----------
    location : str, optional
        Location code ('L48', 'AK', 'HI', 'PR'). Default 'L48'.
    year : int, optional
        NLCD data year. If None, uses most recent available for location.
    .. [NLCD] https://www.mrlc.gov/
    """
    colors = colors
    indices = indices
    def __init__(self, location='L48', year=None):
        """Initialize NLCD manager for specific location and year.
        
        Parameters
        ----------
        location : str, optional
            Location code ('L48', 'AK', 'HI', 'PR'). Default 'L48'.
        year : int, optional
            NLCD data year. If None, uses most recent available for location.
        """
        self.location = self._validateLocation(location)
        self.year = self._validateYear(year, location)
        
        # NLCD is non-temporal - each instance represents one year snapshot
        native_crs = CRS.from_epsg(4326)  # WGS84 Geographic
        super().__init__(
            name=f'NLCD {self.year} {self.location}',
            source='pygeohydro',
            native_resolution=0.00027,  # ~30m in degrees (approximately 30m at mid-latitudes)
            native_crs_in=native_crs,    # Expected input CRS
            native_crs_out=native_crs,   # Output data CRS
            native_start=None,           # Non-temporal data
            native_end=None,             # Non-temporal data
            valid_variables=['cover', 'impervious', 'canopy', 'descriptor'],
            default_variables=['cover']
        )
    def _validateLocation(self, location):
        """Validate location parameter."""
        valid_locations = ['L48', 'AK', 'HI', 'PR']
        if location not in valid_locations:
            raise ValueError(f'NLCD invalid location "{location}", valid are: {valid_locations}')
        return location
    def _validateYear(self, year, location):
        """Validate year for given location."""
        valid_years = {
            'L48': [2021, 2019, 2016, 2013, 2011, 2008, 2006, 2004, 2001],
            'AK': [2016, 2011, 2001],
            'HI': [2001],
            'PR': [2001]
        }
        
        if year is None:
            return valid_years[location][0]  # Most recent
        
        if year not in valid_years[location]:
            raise ValueError(f'NLCD invalid year "{year}" for location {location}, '
                            f'valid are: {valid_years[location]}')
        return year
    def _requestDataset(self, request: manager_dataset.ManagerDataset.Request
                        ) -> manager_dataset.ManagerDataset.Request:
        """Request NLCD data - ready immediately.
        
        Parameters
        ----------
        request : ManagerDataset.Request
            Dataset request with preprocessed parameters.
            
        Returns
        -------
        ManagerDataset.Request
            Updated request marked as ready.
        """
        request.is_ready = True
        return request
    def _fetchDataset(self, request: manager_dataset.ManagerDataset.Request) -> xr.Dataset:
        """Fetch NLCD data for the request.
        
        Parameters
        ----------
        request : ManagerDataset.Request
            Dataset request with preprocessed parameters.
            
        Returns
        -------
        xr.Dataset
            Dataset with requested NLCD variables for the specified year.
        """
        # Extract parameters from request
        geometry = request.geometry
        variables = request.variables
        
        assert variables is not None, "Variables should not be None for multi-variable NLCD data"
        
        # Create GeoDataFrame with native CRS (geometry is already in native_crs_in)
        geom_df = gpd.GeoDataFrame(geometry=[geometry], crs=self.native_crs_in)
        
        # Build years dict for pygeohydro - single year for all variables
        years_dict = {var: [self.year] for var in variables}
        
        # Fetch data using pygeohydro
        data_dict = pygeohydro.nlcd_bygeom(
            geom_df,
            resolution=30,  # Use 30 meters (pygeohydro expects meters)
            years=years_dict,
            region=self.location,
        )
        
        # Extract the dataset (dict key is GeoDataFrame index, we have index 0)
        raw_dataset = data_dict[0]
        
        # Create final dataset with variable names as keys (not prefixed)
        final_dataset = xr.Dataset()
        for var in variables:
            # pygeohydro returns variables as 'var_year'
            source_key = f'{var}_{self.year}'
            if source_key in raw_dataset:
                final_dataset[var] = raw_dataset[source_key]
            else:
                raise ValueError(f"Variable {var} for year {self.year} not found in pygeohydro response")
        
        # Add metadata attributes
        final_dataset.attrs['nlcd_year'] = self.year
        final_dataset.attrs['nlcd_location'] = self.location
        
        return final_dataset