Source code for watershed_workflow.sources.manager_glhymps
"""Manager for interacting with GLHYMPS v2.0 dataset."""
import os, sys
import logging
import numpy as np
import pandas, geopandas
import shapely
from watershed_workflow.crs import CRS
from . import manager_shapefile
from . import filenames
# No API for getting GLHYMPS locally -- must download the whole thing.
urls = { 'GLHYMPS version 2.0': 'https://doi.org/10.5683/SP2/TTJNIU'}
[docs]
class ManagerGLHYMPS(manager_shapefile.ManagerShapefile):
"""The [GLHYMPS]_ global hydrogeology map provides global values of a
two-layer (unconsolidated, consolidated) structure.
.. note:: GLHYMPS does not have an API, and is a large (~4GB)
download. Download the file from the below citation DOI and
unzip the file into:
<data_directory>/soil_structure/GLHYMPS/
which should yield GLHYMPS.shp (amongst other files).
.. [GLHYMPS] Huscroft, J.; Gleeson, T.; Hartmann, J.; Börker, J.,
2018, "Compiling and mapping global permeability of the
unconsolidated and consolidated Earth: GLobal HYdrogeology MaPS
2.0 (GLHYMPS 2.0). [Supporting Data]",
https://doi.org/10.5683/SP2/TTJNIU, Scholars Portal Dataverse,
V1
"""
def __init__(self, filename=None):
if filename is None:
self.name = 'GLHYMPS version 2.0'
self.names = filenames.Names(
self.name, os.path.join('soil_structure', 'GLHYMPS'), '', 'GLHYMPS.shp')
super(ManagerGLHYMPS, self).__init__(self.names.file_name(), id_name='OBJECTID_1')
else:
self.name = filename
self.names = None
super(ManagerGLHYMPS, self).__init__(self.name, id_name='OBJECTID_1')
def _download(self, force : bool = False):
"""Download the files, returning downloaded filename."""
# check directory structure
if self.names is None:
return self.name
filename = self.names.file_name()
logging.info(' from file: {}'.format(filename))
if not os.path.exists(filename):
logging.error(f'GLHYMPS download file {filename} not found.')
logging.error('See download instructions below\n\n')
logging.error(self.__doc__)
raise RuntimeError(f'GLHYMPS download file {filename} not found.')
return filename
def _getShapesByGeometry(self, geometry_gdf: geopandas.GeoDataFrame) -> geopandas.GeoDataFrame:
"""Fetch shapes for the given geometry, ensuring file exists first.
Parameters
----------
geometry_gdf : geopandas.GeoDataFrame
GeoDataFrame with geometries in native_crs_in to search for shapes.
Returns
-------
geopandas.GeoDataFrame
Raw GeoDataFrame with native column names and CRS properly set.
"""
# Ensure GLHYMPS file exists before attempting to read
self._download()
return super()._getShapesByGeometry(geometry_gdf)
def _getShapesByID(self, ids: list[str]) -> geopandas.GeoDataFrame:
"""Fetch shapes by ID list, ensuring file exists first.
Parameters
----------
ids : list[str]
List of IDs to retrieve.
Returns
-------
geopandas.GeoDataFrame
Raw GeoDataFrame with native column names and CRS properly set.
"""
# Ensure GLHYMPS file exists before attempting to read
self._download()
return super()._getShapesByID(ids)