Source code for genno.compat.pyam.operator

import logging
import re
from collections.abc import Callable, Collection, Hashable, Iterable, Mapping
from functools import partial
from os import PathLike
from pathlib import Path
from typing import TYPE_CHECKING, Any
from warnings import warn

import genno
import genno.operator
from genno.core.key import Key, KeyLike
from genno.core.operator import Operator

from . import HAS_PYAM, util

if TYPE_CHECKING:
    from collections.abc import MutableMapping

    import pandas

    from genno.core.computer import Computer
    from genno.types import AnyQuantity, HasScenarioIdentifiers, TQuantity

__all__ = [
    "as_pyam",
    "quantity_from_iamc",
]

log = logging.getLogger(__name__)

if HAS_PYAM:
    from pyam import IamDataFrame
else:
    # pyam not available → define a dummy class for register() decorators

    class IamDataFrame:  # type: ignore [no-redef]
        pass


[docs] @Operator.define() def as_pyam( scenario: "HasScenarioIdentifiers", quantity: "AnyQuantity", *, rename: Mapping[str, str] | None = None, collapse: Callable | None = None, replace: "MutableMapping" = dict(), drop: Collection[str] | str = "auto", unit: str | None = None, prepend_name: bool = True, model_name: str | None = None, scenario_name: str | None = None, ) -> "IamDataFrame": """Return a :class:`pyam.IamDataFrame` containing the data from `quantity`. Warnings are logged if the arguments result in additional, unhandled columns in the resulting data frame that are not part of the IAMC spec. The conversion has the following steps: 1. `quantity` is converted to a temporary :class:`pandas.DataFrame`. 2. Labels for the following IAMC dimensions are filled: - ``model``, ``scenario``: from attributes of the `scenario`, `model_name`, and/or `scenario_name` argument(s). - ``variable``: from the :attr:`~.Quantity.name` of `quantity`, if any. - ``unit``: from the :attr:`~.Quantity.units` of `quantity`, if any. 3. The actions specified by the optional arguments `rename`, `collapse`, `replace`, `drop`, and `unit`, if any, are applied in that order. 4. The resulting data frame is converted to :class:`pyam.IamDataFrame`. Parameters ---------- scenario : Any object with :py:`model` and :py:`scenario` attributes of type :class:`str`, for instance an :class:`ixmp.Scenario` or :class:`~message_ix_models.util.scenarioinfo.ScenarioInfo`; **or** a :class:`str`, which is equivalent to `scenario_name`. quantity : .Quantity Quantity to convert to IAMC data structure. rename : dict, optional Mapping from dimension names in `quantity` (:class:`str`) to column names (:class:`str`); either IAMC dimension names, or others that are consumed by `collapse`. collapse : callable, optional Function that takes a :class:`pandas.DataFrame` and returns the same type. This function **may** collapse 2 or more dimensions, for example to construct labels for the IAMC ``variable`` dimension, or any other. replace : optional Values to be replaced and their replaced. Passed directly to :meth:`pandas.DataFrame.replace`. drop : str or collection of str, optional Columns to drop. Passed to :func:`.util.drop`, so if not given, all non-IAMC columns are dropped. unit : str, optional Label for the IAMC ``unit`` dimension. Passed to :func:`~.pyam.util.clean_units`. prepend_name : bool, optional If :any:`True`, the :attr:`.Quantity.name` of `quantity` is prepended to the IAMC ``variable`` dimension. model_name : str, optional Value for the IAMC ``model`` dimension. scenario_name : str, optional Value for the IAMC ``scenario`` dimension. Raises ------ ValueError If the resulting data frame has duplicate keys in the IAMC dimensions. :class:`pyam.IamDataFrame` cannot handle such data. TypeError If both `scenario` and `scenario_name` are non-empty :class:`str`. """ import pyam # Values to assign on all rows assign = dict(unit=quantity.units) if prepend_name: assign.update(variable=quantity.name) try: assign.update(model=scenario.model, scenario=scenario.scenario) except AttributeError: if scenario and scenario_name: raise TypeError(f"Both {scenario=!r} and {scenario_name=!r} given") assign.update(model=model_name or "", scenario=scenario or scenario_name or "") # - Convert to pd.DataFrame # - Rename one dimension to 'year' or 'time' # - Fill variable, unit, model, and scenario columns # - Replace values # - Apply the collapse callback, if given # - Drop any unwanted columns # - Clean units df = ( quantity.to_series() .rename("value") .reset_index() .assign(**assign) .rename(columns=rename or dict()) .pipe(collapse or util.collapse) .replace(replace, regex=True) .pipe(util.drop, columns=drop) .pipe(util.clean_units, unit) ) # Raise exception for non-unique data duplicates = df.duplicated(subset=set(df.columns) - {"value"}) if duplicates.any(): raise ValueError( "Duplicate IAMC indices cannot be converted:\n" + str(df[duplicates].drop(columns=["model", "scenario"])) ) return pyam.IamDataFrame(df)
[docs] @as_pyam.helper def add_as_pyam( func: Callable, c: "Computer", quantities: KeyLike | Iterable[KeyLike], tag: str = "iamc", /, **kwargs: Any, ) -> KeyLike | tuple[KeyLike, ...]: """:meth:`.Computer.add` helper for :func:`.as_pyam`. Add conversion of one or more `quantities` to the IAMC data structure. Parameters ---------- quantities : str or .Key or list of str or .Key Keys for quantities to transform. tag : str, optional Tag to append to new Keys. Other parameters ---------------- kwargs : Any keyword arguments accepted by :func:`.as_pyam`. Returns ------- list of .Key Each task converts a :class:`.Quantity` into a :class:`pyam.IamDataFrame`. """ # Handle single vs. iterable of inputs if isinstance(quantities, (str, Key)): quantities = [quantities] multi_arg = False else: multi_arg = True if len(kwargs.get("replace", {})) and not isinstance( next(iter(kwargs["replace"].values())), dict ): kwargs["replace"] = dict(variable=kwargs.pop("replace")) warn( f"replace must be nested dict(), e.g. {repr(kwargs['replace'])}", DeprecationWarning, ) # Check keys quantities = c.check_keys(*quantities) # The callable for the task. If pyam is not available, require_compat() above will # fail; so this will never be None comp = partial(func, **kwargs) keys = [] for qty in quantities: # Key for the input quantity, e.g. foo:x-y-z key = Key(qty) # Key for the task/output, e.g. foo::iamc keys.append(Key(key.name, tag=tag)) # Add the task and store the key c.add_single(keys[-1], (comp, "scenario", key)) return tuple(keys) if multi_arg else keys[0]
@genno.operator.concat.register def _(*args: "IamDataFrame", **kwargs: Any) -> "IamDataFrame": """Concatenate `args`, which must all be :class:`pyam.IamDataFrame`. Otherwise, equivalent to :func:`genno.operator.concat`. """ import pyam # Use pyam.concat() top-level function return pyam.concat(args, **kwargs)
[docs] def quantity_from_iamc( qty: "TQuantity | IamDataFrame | pandas.DataFrame", variable: str, *, fail: int | str = "warning", ) -> "TQuantity": """Extract data for a single measure from `qty` with IAMC-like structure. Parameters ---------- qty : Must have at least 2 dimensions named ‘v’ (or ‘variable’, any case) and ‘u’ (or ‘unit’, any case). variable : str Regular expression to match full labels on the ``v`` dimension of `qty`. If the expression contains match groups, they are used to rewrite ``v`` labels: only the contents of the first match group are kept. This may be used to discard a portion of the label. Returns ------- .Quantity The ‘variable’ dimension contains reduced labels. The :attr:`.Quantity.units` attribute contains the unique units for the subset of data. See also -------- unique_units_from_dim """ import pandas as pd from pyam import IamDataFrame from genno.operator import relabel, select, unique_units_from_dim from .util import IAMC_DIMS if isinstance(qty, pd.DataFrame): # Convert pandas.DataFrame to pyam.IamDataFrame qty = IamDataFrame(qty) if isinstance(qty, IamDataFrame): # Convert IamDataFrame to Quantity df = qty.as_pandas() qty = genno.Quantity(df.set_index(list(IAMC_DIMS & set(df.columns)))["value"]) assert isinstance(qty, genno.Quantity) # Identify a dimension whose name is in `targets` def identify_dim(targets: Collection[str]) -> Hashable: result = [d for d in qty.dims if str(d).lower() in targets] if len(result) != 1: raise ValueError( f"cannot identify 1 unique dimension for {targets!r} among " f"{qty.dims!r}; found {result!r}" ) return result[0] v_dim = identify_dim(("v", "variable")) u_dim = identify_dim(("u", "unit")) # Compile expression expr = re.compile(variable) has_group = expr.groups > 0 # Process each label along v_dim variables, replacements = [], {} for var in qty.coords[v_dim].data: if match := expr.fullmatch(var): variables.append(match.group(0)) replacements.update({match.group(0): match.group(1)} if has_group else {}) if not variables: log.warning( f"0 of {len(qty.coords[v_dim])} labels on dimension {v_dim!r} were a full " f"match for {expr!r}" ) return ( qty.pipe(select, {v_dim: variables}) .pipe(relabel, {v_dim: replacements}) .pipe(unique_units_from_dim, u_dim, fail=fail) )
@genno.operator.write_report.register def _(quantity: "IamDataFrame", path: PathLike, kwargs: Any = None) -> None: """Write `obj` to the file at `path`. If `obj` is a :class:`pyam.IamDataFrame` and `path` ends with ".csv" or ".xlsx", use :mod:`pyam` methods to write the file to CSV or Excel format, respectively. Otherwise, equivalent to :func:`genno.operator.write_report`. """ path = Path(path) if kwargs is not None and len(kwargs): raise NotImplementedError( "Keyword arguments to write_report(pyam.IamDataFrame, …)" ) if path.suffix == ".csv": quantity.to_csv(path) elif path.suffix == ".xlsx": quantity.to_excel(path) else: raise ValueError( f"pyam.IamDataFrame can be written to .csv or .xlsx, not {path.suffix}" ) def __getattr__(name: str) -> Any: if name in ("concat", "write_report"): warn( f"Importing {name!r} from genno.compat.pyam.operator; import from " "genno.operator instead.", DeprecationWarning, 2, ) return getattr(genno.operator, name) else: raise AttributeError(name)