diff --git a/icepyx/core/cmr.py b/icepyx/core/cmr.py new file mode 100644 index 000000000..0c453c78b --- /dev/null +++ b/icepyx/core/cmr.py @@ -0,0 +1,3 @@ +from typing import Final + +CMR_PROVIDER: Final = "NSIDC_CPRD" diff --git a/icepyx/core/granules.py b/icepyx/core/granules.py index 5512120bb..119e60bb3 100644 --- a/icepyx/core/granules.py +++ b/icepyx/core/granules.py @@ -7,6 +7,7 @@ import pprint import re import time +from typing import Union from xml.etree import ElementTree as ET import zipfile @@ -16,6 +17,7 @@ import icepyx.core.APIformatting as apifmt from icepyx.core.auth import EarthdataAuthMixin +from icepyx.core.cmr import CMR_PROVIDER import icepyx.core.exceptions from icepyx.core.types import ( CMRParams, @@ -25,7 +27,7 @@ from icepyx.core.urls import DOWNLOAD_BASE_URL, GRANULE_SEARCH_BASE_URL, ORDER_BASE_URL -def info(grans): +def info(grans: list[dict]) -> dict[str, Union[int, float]]: """ Return some basic summary information about a set of granules for an query object. Granule info may be from a list of those available @@ -45,7 +47,14 @@ def info(grans): # DevNote: currently this fn is not tested # DevNote: could add flag to separate ascending and descending orbits based on ATL03 granule region -def gran_IDs(grans, ids=False, cycles=False, tracks=False, dates=False, cloud=False): +def gran_IDs( + grans: list[dict], + ids: bool = False, + cycles: bool = False, + tracks: bool = False, + dates: bool = False, + cloud: bool = False, +): """ Returns a list of granule information for each granule dictionary in the input list of granule dictionaries. @@ -54,17 +63,17 @@ def gran_IDs(grans, ids=False, cycles=False, tracks=False, dates=False, cloud=Fa Parameters ---------- - grans : list of dictionaries + grans : List of input granule json dictionaries. Must have key "producer_granule_id" - ids: boolean, default True + ids : Return a list of the available granule IDs for the granule dictionary - cycles : boolean, default False + cycles : Return a list of the available orbital cycles for the granule dictionary - tracks : boolean, default False + tracks : Return a list of the available Reference Ground Tracks (RGTs) for the granule dictionary - dates : boolean, default False + dates : Return a list of the available dates for the granule dictionary. - cloud : boolean, default False + cloud : Return a a list of AWS s3 urls for the available granules in the granule dictionary. """ assert len(grans) > 0, "Your data object has no granules associated with it" @@ -226,7 +235,7 @@ def get_avail( params = apifmt.combine_params( CMRparams, {k: reqparams[k] for k in ["short_name", "version", "page_size"]}, - {"provider": "NSIDC_CPRD"}, + {"provider": CMR_PROVIDER}, ) cmr_search_after = None diff --git a/icepyx/core/is2ref.py b/icepyx/core/is2ref.py index 95df81971..ac080dd4f 100644 --- a/icepyx/core/is2ref.py +++ b/icepyx/core/is2ref.py @@ -1,4 +1,5 @@ import json +from typing import Optional import warnings from xml.etree import ElementTree as ET @@ -20,38 +21,38 @@ def _validate_product(product): "A valid product string was not provided. " "Check user input, if given, or file metadata." ) - if isinstance(product, str): - product = str.upper(product) - assert product in [ - "ATL01", - "ATL02", - "ATL03", - "ATL04", - "ATL06", - "ATL07", - "ATL07QL", - "ATL08", - "ATL09", - "ATL09QL", - "ATL10", - "ATL11", - "ATL12", - "ATL13", - "ATL14", - "ATL15", - "ATL16", - "ATL17", - "ATL19", - "ATL20", - "ATL21", - "ATL23", - ], error_msg - else: + if not isinstance(product, str): raise TypeError(error_msg) + + product = str.upper(product) + assert product in [ + "ATL01", + "ATL02", + "ATL03", + "ATL04", + "ATL06", + "ATL07", + "ATL07QL", + "ATL08", + "ATL09", + "ATL09QL", + "ATL10", + "ATL11", + "ATL12", + "ATL13", + "ATL14", + "ATL15", + "ATL16", + "ATL17", + "ATL19", + "ATL20", + "ATL21", + "ATL23", + ], error_msg return product -def _validate_OA_product(product): +def _validate_OA_product(product) -> str: """ Confirm a valid ICESat-2 product was specified """ @@ -74,7 +75,7 @@ def _validate_OA_product(product): # DevNote: test for this function is commented out; dates in some of the values were causing the test to fail... -def about_product(prod): +def about_product(prod: str) -> dict: """ Ping Earthdata to get metadata about the product of interest (the collection). @@ -170,7 +171,7 @@ def get_varlist(elem): # DevGoal: populate this with default variable lists for all of the products! # DevGoal: add a test for this function (to make sure it returns the right list, but also to deal with product not being in the list, though it should since it was checked as valid earlier...) -def _default_varlists(product): +def _default_varlists(product) -> list[str]: """ Return a list of default variables to select and send to the NSIDC subsetter. """ @@ -276,7 +277,7 @@ def _default_varlists(product): # a faster version using pandas map (instead of apply) is available in SlideRule: # https://github.com/SlideRuleEarth/sliderule/issues/388 # https://github.com/SlideRuleEarth/sliderule/commit/46cceac0e5f6d0a580933d399a6239bc911757f3 -def gt2spot(gt, sc_orient): +def gt2spot(gt, sc_orient) -> np.uint8: warnings.warn( "icepyx versions 0.8.0 and earlier used an incorrect spot number calculation." "As a result, computations depending on spot number may be incorrect and should be redone." @@ -295,6 +296,7 @@ def gt2spot(gt, sc_orient): gr_lr = gt[3] # spacecraft oriented forward + spot: Optional[int] = None if sc_orient == 1: if gr_num == 1: if gr_lr == "l": @@ -330,13 +332,13 @@ def gt2spot(gt, sc_orient): elif gr_lr == "r": spot = 6 - if "spot" not in locals(): + if spot is None: raise ValueError("Could not compute the spot number.") return np.uint8(spot) -def latest_version(product): +def latest_version(product) -> str: """ Determine the most recent version available for the given product. @@ -350,7 +352,7 @@ def latest_version(product): return max([entry["version_id"] for entry in _about_product["feed"]["entry"]]) -def extract_product(filepath, auth=None): +def extract_product(filepath, auth=None) -> str: """ Read the product type from the metadata of the file. Valid for local or s3 files, but must provide an auth object if reading from s3. Return the product as a string. @@ -396,7 +398,7 @@ def extract_product(filepath, auth=None): return product -def extract_version(filepath, auth=None): +def extract_version(filepath, auth=None) -> str: """ Read the version from the metadata of the file. Valid for local or s3 files, but must provide an auth object if reading from s3. Return the version as a string. @@ -423,23 +425,28 @@ def extract_version(filepath, auth=None): f = h5py.File(filepath, "r") # Read the version information + version_str: str try: - version = f["METADATA"]["DatasetIdentification"].attrs["VersionID"] + version = f["METADATA"]["DatasetIdentification"].attrs["VersionID"] # pyright: ignore[reportIndexIssue] if isinstance(version, np.ndarray): # ATL14 stores the version as an array ['00x'] - version = version[0] - if isinstance(version, bytes): - version = version.decode() - - except KeyError as e: + version_str = version[0] + elif isinstance(version, bytes): + version_str = version.decode() + else: + raise TypeError(f"Unexpected type {version=}") + except (KeyError, TypeError) as e: raise Exception( "Unable to parse the version from file metadata" ).with_traceback(e.__traceback__) + finally: + # Close the file reader + f.close() # catch cases where the version number is an invalid string # e.g. a VersionID of "SET_BY_PGE", causing issues where version needs to be a valid number try: - float(version) + float(version) # pyright: ignore[reportArgumentType] except ValueError: raise Exception( "There is an underlying issue with the version information" @@ -447,6 +454,4 @@ def extract_version(filepath, auth=None): "Consider setting the version manually for further processing." ) - # Close the file reader - f.close() - return version + return version_str diff --git a/icepyx/core/query.py b/icepyx/core/query.py index f34acda37..4d0d3015f 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -1,8 +1,10 @@ import datetime as dt +from functools import cached_property import pprint from typing import Optional, Union, cast import geopandas as gpd +import holoviews as hv import matplotlib.pyplot as plt from typing_extensions import Never @@ -148,14 +150,13 @@ def __init__( if date_range: self._temporal = tp.Temporal(date_range, start_time, end_time) - def __str__(self): - str = "Extent type: {0} \nCoordinates: {1}\nDate range: ({2}, {3})".format( + def __str__(self) -> str: + return "Extent type: {0} \nCoordinates: {1}\nDate range: ({2}, {3})".format( self._spatial._ext_type, self._spatial._spatial_ext, self._temporal._start, self._temporal._end, ) - return str # ---------------------------------------------------------------------- # Properties @@ -183,11 +184,11 @@ def temporal(self) -> Union[tp.Temporal, list[str]]: ['No temporal parameters set'] """ - if hasattr(self, "_temporal"): - return self._temporal - else: + if not hasattr(self, "_temporal"): return ["No temporal parameters set"] + return self._temporal + @property def spatial(self) -> spat.Spatial: """ @@ -276,11 +277,11 @@ def dates(self) -> list[str]: """ if not hasattr(self, "_temporal"): return ["No temporal parameters set"] - else: - return [ - self._temporal._start.strftime("%Y-%m-%d"), - self._temporal._end.strftime("%Y-%m-%d"), - ] # could also use self._start.date() + + return [ + self._temporal._start.strftime("%Y-%m-%d"), + self._temporal._end.strftime("%Y-%m-%d"), + ] # could also use self._start.date() @property def start_time(self) -> Union[list[str], str]: @@ -303,8 +304,8 @@ def start_time(self) -> Union[list[str], str]: """ if not hasattr(self, "_temporal"): return ["No temporal parameters set"] - else: - return self._temporal._start.strftime("%H:%M:%S") + + return self._temporal._start.strftime("%H:%M:%S") @property def end_time(self) -> Union[list[str], str]: @@ -327,8 +328,8 @@ def end_time(self) -> Union[list[str], str]: """ if not hasattr(self, "_temporal"): return ["No temporal parameters set"] - else: - return self._temporal._end.strftime("%H:%M:%S") + + return self._temporal._end.strftime("%H:%M:%S") # DevGoal: update docs throughout to allow for polygon spatial extent @@ -458,14 +459,13 @@ def __init__( # ---------------------------------------------------------------------- # Properties - def __str__(self): - str = "Product {2} v{3}\n{0}\nDate range {1}".format( + def __str__(self) -> str: + return "Product {2} v{3}\n{0}\nDate range {1}".format( self.spatial_extent, self.dates, self.product, self.product_version ) - return str @property - def dataset(self): + def dataset(self) -> Never: """ Legacy property included to provide deprecation warning. @@ -473,12 +473,12 @@ def dataset(self): -------- product """ - DeprecationError( + raise DeprecationError( "In line with most common usage, 'dataset' has been replaced by 'product'.", ) @property - def product(self): + def product(self) -> str: """ Return the short name product ID string associated with the query object. @@ -491,7 +491,7 @@ def product(self): return self._prod @property - def product_version(self): + def product_version(self) -> str: """ Return the product version of the data object. @@ -508,7 +508,7 @@ def product_version(self): return self._version @property - def cycles(self): + def cycles(self) -> list[str]: """ Return the unique ICESat-2 orbital cycle. @@ -528,7 +528,7 @@ def cycles(self): return sorted(set(self._cycles)) @property - def tracks(self): + def tracks(self) -> list[str]: """ Return the unique ICESat-2 Reference Ground Tracks @@ -694,7 +694,7 @@ def subsetparams(self, **kwargs) -> Union[EGIParamsSubset, dict[Never, Never]]: # DevGoal: add to tests # DevGoal: add statements to the following vars properties to let the user know if they've got a mismatched source and vars type @property - def order_vars(self): + def order_vars(self) -> Variables: """ Return the order variables object. This instance is generated when data is ordered from the NSIDC. @@ -736,13 +736,13 @@ def order_vars(self): return self._order_vars - @property - def granules(self): + @cached_property + def granules(self) -> Granules: """ - Return the granules object, which provides the underlying functionality for searching, ordering, - and downloading granules for the specified product. - Users are encouraged to use the built-in wrappers - rather than trying to access the granules object themselves. + Return the granules object, which provides the underlying functionality + for searching, ordering, and downloading granules for the specified + product. Users are encouraged to use the built-in wrappers rather than + trying to access the granules object themselves. See Also -------- @@ -753,20 +753,16 @@ def granules(self): Examples -------- - >>> reg_a = ipx.Query('ATL06',[-55, 68, -48, 71],['2019-02-20','2019-02-28']) # doctest: +SKIP + >>> reg_a = ipx.Query('ATL06',[-55, 68, -48, 71],['2019-02-20','2019-02-28']) # +SKIP >>> reg_a.granules # doctest: +SKIP """ - - if not hasattr(self, "_granules") or self._granules is None: - self._granules = Granules() - - return self._granules + return Granules() # ---------------------------------------------------------------------- # Methods - Get and display neatly information at the product level - def product_summary_info(self): + def product_summary_info(self) -> None: """ Display a summary of selected metadata for the specified version of the product of interest (the collection). @@ -797,7 +793,7 @@ def product_summary_info(self): for key in summ_keys: print(key, ": ", self._about_product["feed"]["entry"][-1][key]) - def product_all_info(self): + def product_all_info(self) -> None: """ Display all metadata about the product of interest (the collection). @@ -812,7 +808,7 @@ def product_all_info(self): self._about_product = is2ref.about_product(self._prod) pprint.pprint(self._about_product) - def latest_version(self): + def latest_version(self) -> str: """ A reference function to is2ref.latest_version. @@ -826,7 +822,7 @@ def latest_version(self): """ return is2ref.latest_version(self.product) - def show_custom_options(self, dictview=False): + def show_custom_options(self, dictview=False) -> None: """ Display customization/subsetting options available for this product. @@ -902,7 +898,13 @@ def show_custom_options(self, dictview=False): # Methods - Granules (NSIDC-API) # DevGoal: check to make sure the see also bits of the docstrings work properly in RTD - def avail_granules(self, ids=False, cycles=False, tracks=False, cloud=False): + def avail_granules( + self, + ids: bool = False, + cycles: bool = False, + tracks: bool = False, + cloud: bool = False, + ) -> Union[list[list[str]], dict[str, Union[int, float]]]: """ Obtain information about the available granules for the query object's parameters. By default, a complete list of available granules is @@ -943,8 +945,6 @@ def avail_granules(self, ids=False, cycles=False, tracks=False, cloud=False): """ # REFACTOR: add test to make sure there's a session - if not hasattr(self, "_granules"): - self.granules try: self.granules.avail except AttributeError: @@ -965,7 +965,13 @@ def avail_granules(self, ids=False, cycles=False, tracks=False, cloud=False): # DevGoal: display output to indicate number of granules successfully ordered (and number of errors) # DevGoal: deal with subset=True for variables now, and make sure that if a variable subset # Coverage kwarg is input it's successfully passed through all other functions even if this is the only one run. - def order_granules(self, verbose=False, subset=True, email=False, **kwargs): + def order_granules( + self, + verbose: bool = False, + subset: bool = True, + email: bool = False, + **kwargs, + ) -> None: """ Place an order for the available granules for the query object. @@ -1033,8 +1039,6 @@ def order_granules(self, verbose=False, subset=True, email=False, **kwargs): # REFACTOR: add checks here to see if the granules object has been created, # and also if it already has a list of avail granules (if not, need to create one and add session) - if not hasattr(self, "_granules"): - self.granules # Place multiple orders, one per granule, if readable_granule_name is used. if "readable_granule_name[]" in self.CMRparams: @@ -1046,7 +1050,7 @@ def order_granules(self, verbose=False, subset=True, email=False, **kwargs): ) for gran in gran_name_list: tempCMRparams["readable_granule_name[]"] = gran - self._granules.place_order( + self.granules.place_order( tempCMRparams, cast(EGIRequiredParamsDownload, self.reqparams), self.subsetparams(**kwargs), @@ -1056,7 +1060,7 @@ def order_granules(self, verbose=False, subset=True, email=False, **kwargs): ) else: - self._granules.place_order( + self.granules.place_order( self.CMRparams, cast(EGIRequiredParamsDownload, self.reqparams), self.subsetparams(**kwargs), @@ -1067,8 +1071,13 @@ def order_granules(self, verbose=False, subset=True, email=False, **kwargs): # DevGoal: put back in the kwargs here so that people can just call download granules with subset=False! def download_granules( - self, path, verbose=False, subset=True, restart=False, **kwargs - ): # , extract=False): + self, + path: str, + verbose: bool = False, + subset: bool = True, + restart: bool = False, + **kwargs, + ) -> None: """ Downloads the data ordered using order_granules. @@ -1114,19 +1123,16 @@ def download_granules( # os.mkdir(path) # os.chdir(path) - if not hasattr(self, "_granules"): - self.granules - if restart is True: pass else: if ( - not hasattr(self._granules, "orderIDs") - or len(self._granules.orderIDs) == 0 + not hasattr(self.granules, "orderIDs") + or len(self.granules.orderIDs) == 0 ): self.order_granules(verbose=verbose, subset=subset, **kwargs) - self._granules.download(verbose, path, restart=restart) + self.granules.download(verbose, path, restart=restart) # DevGoal: add testing? What do we test, and how, given this is a visualization. # DevGoal(long term): modify this to accept additional inputs, etc. @@ -1134,7 +1140,7 @@ def download_granules( # DevGoal: see Amy's data access notebook for a zoomed in map - implement here? def visualize_spatial_extent( self, - ): # additional args, basemap, zoom level, cmap, export + ) -> None: # additional args, basemap, zoom level, cmap, export """ Creates a map displaying the input spatial extent @@ -1164,7 +1170,7 @@ def visualize_spatial_extent( gdf.plot(ax=ax, color="#FF8C00", alpha=0.7) plt.show() - def visualize_elevation(self): + def visualize_elevation(self) -> tuple[hv.DynamicMap, hv.Layout]: """ Visualize elevation requested from OpenAltimetry API using datashader based on cycles https://holoviz.org/tutorial/Large_Data.html diff --git a/icepyx/core/spatial.py b/icepyx/core/spatial.py index cd40fa58b..fef61846f 100644 --- a/icepyx/core/spatial.py +++ b/icepyx/core/spatial.py @@ -759,10 +759,10 @@ def fmt_for_CMR(self) -> str: """ # CMR keywords: ['bounding_box', 'polygon'] - if self._ext_type == "bounding_box": - cmr_extent = ",".join(map(str, self._spatial_ext)) + if self.extent_type == "bounding_box": + return ",".join(map(str, self._spatial_ext)) - elif self._ext_type == "polygon": + elif self.extent_type == "polygon": poly = self.extent_as_gdf.geometry if any( @@ -788,13 +788,11 @@ def fmt_for_CMR(self) -> str: neg_lons = [i if i < 181.0 else i - 360 for i in extent[0:-1:2]] extent = [item for pair in zip(neg_lons, extent[1::2]) for item in pair] - cmr_extent = ",".join(map(str, extent)) + return ",".join(map(str, extent)) else: raise icepyx.core.exceptions.ExhaustiveTypeGuardException - return cmr_extent - def fmt_for_EGI(self) -> str: """ Format the spatial extent input into a subsetting key value for submission to EGI (the NSIDC DAAC API). @@ -811,17 +809,15 @@ def fmt_for_EGI(self) -> str: """ # subsetting keywords: ['bbox','Boundingshape'] - these are set in APIformatting - if self._ext_type == "bounding_box": - egi_extent = ",".join(map(str, self._spatial_ext)) + if self.extent_type == "bounding_box": + return ",".join(map(str, self._spatial_ext)) # TODO: add handling for polygons that cross the dateline - elif self._ext_type == "polygon": + elif self.extent_type == "polygon": poly = self.extent_as_gdf.geometry[0] poly = orient(poly, sign=1.0) egi_extent = gpd.GeoSeries(poly).to_json() - egi_extent = egi_extent.replace(" ", "") # remove spaces for API call + return egi_extent.replace(" ", "") # remove spaces for API call else: raise icepyx.core.exceptions.ExhaustiveTypeGuardException - - return egi_extent diff --git a/pyproject.toml b/pyproject.toml index 932da57e9..386b79f7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -141,7 +141,6 @@ exclude = [ ignore = [ "icepyx/quest/*", "icepyx/core/auth.py", - "icepyx/core/is2ref.py", "icepyx/core/read.py", "icepyx/core/variables.py", "icepyx/core/visualization.py",