import re
from datetime import datetime
from typing import Any, Generator, Never
from zoneinfo import ZoneInfo
from pydantic import validate_call
from monkey_wrench.generic import ListSetTuple, PathLikeType, apply_to_single_or_collection
[docs]
class DateTimeParserBase:
"""A static base class for parsing items, e.g. product IDs or file paths, into datetime objects."""
[docs]
@staticmethod
def _raise_value_error(item: Any) -> Never:
"""Helper function to raise a ``ValueError`` when the given item cannot be parsed."""
raise ValueError(f"Could not parse {item} into a valid datetime object.") from None
[docs]
@staticmethod
@validate_call
def parse_by_regex(item: str, regex: str, timezone: ZoneInfo | None = None) -> datetime:
r"""Parse the given item into a datetime object using a regular expression.
Args:
item:
The item to parse.
regex:
The regular expression to match against.
timezone:
The timezone to add to the datetime object. Defaults to ``None``, which means ``UTC`` will be used.
Returns:
The parsed datetime object, if successful.
Raises:
ValueError:
If the given item cannot be parsed.
Example:
>>> regex = r"^(19|20\d{2})(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])_(0\d|1\d|2[0-3])_([0-5]\d)$"
>>> DateTimeParserBase.parse_by_regex("20230102_22_30", regex)
datetime.datetime(2023, 1, 2, 22, 30, tzinfo=zoneinfo.ZoneInfo(key='UTC'))
"""
if timezone is None:
timezone = ZoneInfo("UTC")
try:
if match := re.search(regex, item):
return datetime(*[int(m) for m in match.groups()], tzinfo=timezone)
raise ValueError()
except ValueError:
DateTimeParserBase._raise_value_error(item)
[docs]
@classmethod
def parse_collection(
cls, items: ListSetTuple[Any] | Generator
) -> ListSetTuple[datetime] | Generator[datetime, None, None]:
"""Parse the given collection of items into a collection of datetime objects.
Args:
items:
The collection (list/set/tuple or generator) of items to parse.
Returns:
A collection of datetime objects. The type of collection matches the type of the input collection, e.g.
a list as input results in a list of datetime objects.
"""
return apply_to_single_or_collection(cls.parse, items)
[docs]
@staticmethod
def parse(item: Any) -> Any:
"""Return the given item as is.
Warning:
Oerride this static method for each derived class.
"""
return item
[docs]
class SeviriIDParser(DateTimeParserBase):
"""Static parser class for SEVIRI product IDs."""
regex = (r"[0-9A-Za-z]+-SEVI-[0-9A-Za-z]+-[0-9]+-NA"
r"-([0-9]{4})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})[0-9]{2}\.[0-9]+Z-NA")
[docs]
@staticmethod
@validate_call
def parse(seviri_product_id: str) -> datetime:
"""Parse the given SEVIRI product ID into a datetime object.
Example:
>>> SeviriIDParser.parse("MSG3-SEVI-MSG15-0100-NA-20150731221240.036000000Z-NA")
datetime.datetime(2015, 7, 31, 22, 12, tzinfo=zoneinfo.ZoneInfo(key='UTC'))
"""
return DateTimeParserBase.parse_by_regex(seviri_product_id, SeviriIDParser.regex)
[docs]
class FCIIDParser(DateTimeParserBase):
"""Static parser class for FCI product IDs."""
[docs]
@staticmethod
@validate_call
def parse(fci_product_id: str) -> datetime:
"""Parse the given FCI product ID into a datetime object.
Example:
>>> FCIIDParser.parse(
... "W_XX-EUMETSAT-Darmstadt,IMG+SAT,MTI1+FCI-1C-RRAD-FDHSI-FD--x-x---x_C_EUMT_"
... "20251216091032_IDPFI_OPE_20251216091007_20251216091923_N__O_0056_0000"
... )
datetime.datetime(2025, 12, 16, 9, 10, tzinfo=zoneinfo.ZoneInfo(key='UTC'))
>>> FCIIDParser.parse(
... "W_XX-EUMETSAT-Darmstadt,IMG+SAT,MTI1+FCI-1C-RRAD-HRFI-FD--x-x---x_C_EUMT_"
... "20250102102250_IDPFI_OPE_20250102102007_20250102102924_N__O_0063_0000"
... )
datetime.datetime(2025, 1, 2, 10, 20, tzinfo=zoneinfo.ZoneInfo(key='UTC'))
"""
return DateTimeParserBase.parse_by_format_string(fci_product_id[-44:-32], "%Y%m%d%H%M")
[docs]
class ChimpFilePathParser(DateTimeParserBase):
"""Static parser class for CHIMP-compiliant input and output file paths."""
regex = r"[0-9A-Za-z]+_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]{2})_([0-9]{2})"
[docs]
@staticmethod
@validate_call
def parse(filepath: PathLikeType) -> datetime:
"""Parse the given filepath into a datetime object.
Args:
filepath:
The filepath to parse. It can be either an absolute path or a relative path (e.g. just the base name).
For the parsing to be successful, the ``filepath`` must have the following format:
``<optional_path><prefix>_<YYYY>_<mm>_<DD>_<HH>_<MM><optional_extension>``, where ``<prefix>`` is
mandatory but can be anything except for an empty string. See the examples below.
Examples:
>>> from pathlib import Path
>>>
>>> # Input is an absolute path of type `Path`.
>>> ChimpFilePathParser.parse(Path("/home/user/dir/seviri_20150731_22_12.extension"))
datetime.datetime(2015, 7, 31, 22, 12, tzinfo=zoneinfo.ZoneInfo(key='UTC'))
>>> # Input is an absolute path of type `Path`.
>>> ChimpFilePathParser.parse(Path("/home/user/dir/seviri_20150110_00_01.extension"))
datetime.datetime(2015, 1, 10, 0, 1, tzinfo=zoneinfo.ZoneInfo(key='UTC'))
>>> # Input is a relative path of type `Path`.
>>> ChimpFilePathParser.parse(Path("chimp_20150731_22_12.extension"))
datetime.datetime(2015, 7, 31, 22, 12, tzinfo=zoneinfo.ZoneInfo(key='UTC'))
>>> # Input is an absolute path of type `str`.
>>> ChimpFilePathParser.parse("/home/user/dir/prefix_20150731_22_12.extension")
datetime.datetime(2015, 7, 31, 22, 12, tzinfo=zoneinfo.ZoneInfo(key='UTC'))
>>> # Input is a relative path of type `str` and does not have an extension.
>>> ChimpFilePathParser.parse("seviri_20150731_22_12")
datetime.datetime(2015, 7, 31, 22, 12, tzinfo=zoneinfo.ZoneInfo(key='UTC'))
>>> # Input is a relative path of type `str` and its extension is numeric, i.e. `72`.
>>> ChimpFilePathParser.parse("p_20150731_22_1272")
datetime.datetime(2015, 7, 31, 22, 12, tzinfo=zoneinfo.ZoneInfo(key='UTC'))
>>> # Input is invalid (missing prefix). The following will raise an exception!
>>> # FilePathParser.parse("20150731_22_12")
>>> # Input is invalid (empty prefix). The following will raise an exception!
>>> # FilePathParser.parse("_20150731_22_12")
"""
return DateTimeParserBase.parse_by_regex(str(filepath.stem), ChimpFilePathParser.regex)
[docs]
class HritFilePathParser(DateTimeParserBase):
"""Static parser class for HRIT file paths."""
[docs]
@staticmethod
@validate_call
def parse(filepath: PathLikeType) -> datetime:
"""Parse the given filepath into a datetime object.
Args:
filepath:
The HRIT filepath to parse. It can be either an absolute path or a relative path
(e.g. just the base name). For the parsing to be successful, the ``filepath`` must have the following
format: ``<optional_path><optional_prefix><YYYYmmDDHHMM>-__``. See the examples below.
Examples:
>>> from pathlib import Path
>>>
>>> # Input is an absolute path of type `Path`.
>>> HritFilePathParser.parse(
... Path("/home/user/dir/H-000-MSG3__-MSG3________-WV_073___-000008___-202503041900-__")
... )
datetime.datetime(2025, 3, 4, 19, 0, tzinfo=zoneinfo.ZoneInfo(key='UTC'))
>>> # Input is a relative path of type `Path`.
>>> HritFilePathParser.parse(Path("H-000-MSG3__-MSG3________-WV_073___-000008___-202503041900-__"))
datetime.datetime(2025, 3, 4, 19, 0, tzinfo=zoneinfo.ZoneInfo(key='UTC'))
>>> # Input is a relative path of type `str` without a prefix.
>>> HritFilePathParser.parse(Path("202503041900-__"))
datetime.datetime(2025, 3, 4, 19, 0, tzinfo=zoneinfo.ZoneInfo(key='UTC'))
>>> # Input is invalid as it misses the mandatory trailing `-__`. The following will raise an exception!
>>> # HritFilePathParser.parse(Path("202503041900"))
"""
return DateTimeParserBase.parse_by_format_string(str(filepath.stem)[-15:-3], "%Y%m%d%H%M")
DateTimeParser = SeviriIDParser | ChimpFilePathParser | HritFilePathParser