pandas_openscm.testing#

Testing helpers

Placed here to avoid putting __init__.py files in our tests directory, see details here: https://docs.pytest.org/en/stable/explanation/goodpractices.html#which-import-mode. Also see here: https://docs.pytest.org/en/stable/explanation/pythonpath.html#pytest-import-mechanisms-and-sys-path-pythonpath.

Functions:

Name	Description
`assert_frame_alike`	Assert that two pd.DataFrame are alike
`assert_move_plan_equal`	Assert that two MovePlan are equal
`changer`	Change a value
`check_result`	Check result in the case where it could be multiple types
`convert_to_desired_type`	Convert a `df` to the desired type for testing
`create_test_df`	Create a pd.DataFrame to use in testing

assert_frame_alike #

assert_frame_alike(
    res: DataFrame,
    exp: DataFrame,
    check_like: bool = True,
    **kwargs: Any,
) -> None

Assert that two pd.DataFrame are alike

Here, alike means that they have the same data, just potentially not in the same order. This includes the order of index levels, which may also differ.

Parameters:

Name	Type	Description	Default
`res`	`DataFrame`	Result to check	required
`exp`	`DataFrame`	Expected result	required
`check_like`	`bool`	Passed to assert_frame_equal	`True`
`**kwargs`	`Any`	Passed to assert_frame_equal	`{}`

Source code in src/pandas_openscm/testing.py

def assert_frame_alike(
    res: pd.DataFrame, exp: pd.DataFrame, check_like: bool = True, **kwargs: Any
) -> None:
    """
    Assert that two [pd.DataFrame][pandas.DataFrame] are alike

    Here, alike means that they have the same data,
    just potentially not in the same order.
    This includes the order of index levels, which may also differ.

    Parameters
    ----------
    res
        Result to check

    exp
        Expected result

    check_like
        Passed to [assert_frame_equal][pandas.testing.assert_frame_equal]

    **kwargs
        Passed to [assert_frame_equal][pandas.testing.assert_frame_equal]
    """
    pd.testing.assert_frame_equal(
        res.reorder_levels(exp.index.names),  # type: ignore # pandas-stubs confused
        exp,
        check_like=check_like,
        **kwargs,
    )

assert_move_plan_equal #

assert_move_plan_equal(
    res: MovePlan, exp: MovePlan
) -> None

Assert that two MovePlan are equal

Parameters:

Name	Type	Description	Default
`res`	`MovePlan`	The result	required
`exp`	`MovePlan`	The expectation	required

Raises:

Type	Description
`AssertionError`	`res` and `exp` are not equal

Source code in src/pandas_openscm/testing.py

def assert_move_plan_equal(res: MovePlan, exp: MovePlan) -> None:
    """
    Assert that two [MovePlan][(p).db.rewriting.] are equal

    Parameters
    ----------
    res
        The result

    exp
        The expectation

    Raises
    ------
    AssertionError
        `res` and `exp` are not equal
    """
    # Check that the indexes are the same.
    # We convert to MultiIndex first as we don't care about the actual index values.
    pd.testing.assert_index_equal(
        pd.MultiIndex.from_frame(res.moved_index.reset_index()),
        pd.MultiIndex.from_frame(exp.moved_index.reset_index()),
        check_order=False,
    )
    pd.testing.assert_series_equal(
        res.moved_file_map, exp.moved_file_map, check_like=True
    )

    if res.rewrite_actions is None:
        assert exp.rewrite_actions is None
    else:
        if exp.rewrite_actions is None:
            msg = f"{exp.rewrite_actions=} while {res.rewrite_actions=}"
            raise AssertionError(msg)

        assert len(res.rewrite_actions) == len(exp.rewrite_actions)
        for res_rwa in res.rewrite_actions:
            for exp_rwa in exp.rewrite_actions:
                if res_rwa.from_file == exp_rwa.from_file:
                    break
            else:
                msg = f"Did not find pair for\n{res_rwa=}\nin\n{exp.rewrite_actions=}"
                raise AssertionError(msg)

            pd.testing.assert_index_equal(
                res_rwa.locator, exp_rwa.locator, check_order=False
            )
            assert res_rwa.to_file == exp_rwa.to_file

    if res.delete_paths is None:
        assert exp.delete_paths is None
    else:
        if exp.delete_paths is None:
            msg = f"{exp.delete_paths=} while {res.delete_paths=}"
            raise AssertionError(msg)

        assert set(res.delete_paths) == set(exp.delete_paths)

changer #

changer(
    inv: float, factor: float, *, exponent: float = 1.0
) -> float

Change a value

This is just meant as a helper for our tests

Source code in src/pandas_openscm/testing.py

def changer(inv: float, factor: float, *, exponent: float = 1.0) -> float:
    """
    Change a value

    This is just meant as a helper for our tests
    """
    return cast(float, (inv * 2.0) ** exponent)

check_result #

check_result(res: P, exp: P) -> None

Check result in the case where it could be multiple types

Specifically, pd.DataFrame or pd.Series.

This is a thin wrapper, if you want specific functionality, use the underlying function.

Parameters:

Name	Type	Description	Default
`res`	`P`	Result	required
`exp`	`P`	Expected	required

Raises:

Type	Description
`TypeError`	Type of `res` is not the same as the type of `exp`

Source code in src/pandas_openscm/testing.py

def check_result(res: P, exp: P) -> None:
    """
    Check result in the case where it could be multiple types

    Specifically, [pd.DataFrame][pandas.DataFrame]
    or [pd.Series][pandas.Series].

    This is a thin wrapper, if you want specific functionality,
    use the underlying function.

    Parameters
    ----------
    res
        Result

    exp
        Expected

    Raises
    ------
    TypeError
        Type of `res` is not the same as the type of `exp`
    """
    if isinstance(res, pd.DataFrame):
        if not isinstance(exp, pd.DataFrame):  # pragma: no cover
            msg = f"{type(res)=} while {type(exp)=}"
            raise TypeError(msg)

        assert_frame_alike(res, exp)

    elif isinstance(res, pd.Series):
        if not isinstance(exp, pd.Series):  # pragma: no cover
            msg = f"{type(res)=} while {type(exp)=}"
            raise TypeError(msg)

        pd.testing.assert_series_equal(res, exp)

    else:  # pragma: no cover
        raise NotImplementedError(type(res))

convert_to_desired_type #

convert_to_desired_type(
    df: DataFrame, pobj_type: Literal["DataFrame"]
) -> DataFrame

convert_to_desired_type(
    df: DataFrame, pobj_type: Literal["Series"]
) -> Series[Any]

convert_to_desired_type(
    df: DataFrame, pobj_type: Literal["DataFrame", "Series"]
) -> DataFrame | Series[Any]

Convert a df to the desired type for testing

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	pd.DataFrame to convert	required
`pobj_type`	`Literal['DataFrame', 'Series']`	Type to convert to If "DataFrame", then `df` is simply returned. If "Series", then the first column of `df` is returned.	required

Returns:

Type	Description
`DataFrame \| Series[Any]`	`df` converted to the desired type

Source code in src/pandas_openscm/testing.py

def convert_to_desired_type(
    df: pd.DataFrame, pobj_type: Literal["DataFrame", "Series"]
) -> pd.DataFrame | pd.Series[Any]:
    """
    Convert a `df` to the desired type for testing

    Parameters
    ----------
    df
        [pd.DataFrame][pandas.DataFrame] to convert

    pobj_type
        Type to convert to

        If "DataFrame", then `df` is simply returned.
        If "Series", then the first column of `df` is returned.

    Returns
    -------
    :
        `df` converted to the desired type
    """
    if pobj_type == "DataFrame":
        return df

    if pobj_type == "Series":
        res = df[df.columns[0]]
        return res

    raise NotImplementedError(pobj_type)  # pragma: no cover

create_test_df #

create_test_df(
    *,
    variables: Collection[tuple[str, str]],
    n_scenarios: int,
    n_runs: int,
    timepoints: NDArray[floating[Any]],
    rng: Generator | None = None,
) -> DataFrame

Create a pd.DataFrame to use in testing

This uses the idea of simple climate model runs, where you have a number of scenarios, each of which has a number of variables from a number of different model runs with output for a number of different time points.

The result will contain all combinations of scenarios, variables and runs, with the units being defined by each variable.

Parameters:

Name	Type	Description	Default
`variables`	`Collection[tuple[str, str]]`	Variables and their units to create	required
`n_scenarios`	`int`	Number of scenarios to create. These are simply incremented with their number.	required
`n_runs`	`int`	Number of runs to create. These are simply numbered.	required
`timepoints`	`NDArray[floating[Any]]`	Time points to use with the data.	required
`rng`	`Generator \| None`	Random number generator to use. If not supplied, we create an instance using numpy.random.default_rng.	`None`

Returns:

Type	Description
`DataFrame`	Generated test pd.DataFrame.

Source code in src/pandas_openscm/testing.py

def create_test_df(
    *,
    variables: Collection[tuple[str, str]],
    n_scenarios: int,
    n_runs: int,
    timepoints: np.typing.NDArray[np.floating[Any]],
    rng: np.random.Generator | None = None,
) -> pd.DataFrame:
    """
    Create a [pd.DataFrame][pandas.DataFrame] to use in testing

    This uses the idea of simple climate model runs,
    where you have a number of scenarios,
    each of which has a number of variables
    from a number of different model runs
    with output for a number of different time points.

    The result will contain all combinations of scenarios,
    variables and runs,
    with the units being defined by each variable.

    Parameters
    ----------
    variables
        Variables and their units to create

    n_scenarios
        Number of scenarios to create.

        These are simply incremented with their number.

    n_runs
        Number of runs to create.

        These are simply numbered.

    timepoints
        Time points to use with the data.

    rng
        Random number generator to use.

        If not supplied, we create an instance using
        [numpy.random.default_rng](https://numpy.org/doc/stable/reference/random/generator.html#numpy.random.default_rng).

    Returns
    -------
    :
        Generated test [pd.DataFrame][pandas.DataFrame].
    """
    if rng is None:
        rng = np.random.default_rng()

    idx = pd.MultiIndex.from_frame(
        pd.DataFrame(
            (
                (s, v_info[0], r, v_info[1])
                for s, v_info, r in itertools.product(
                    [f"scenario_{i}" for i in range(n_scenarios)],
                    variables,
                    [i for i in range(n_runs)],
                )
            ),
            columns=["scenario", "variable", "run", "unit"],
        )
    )

    n_variables = len(variables)
    n_ts = n_scenarios * n_variables * n_runs

    # Give the data a bit of structure so it looks different when plotted.
    values = 50.0 * np.linspace(0.3, 1, n_ts)[:, np.newaxis] * np.linspace(
        0, 1, timepoints.size
    )[np.newaxis, :] + rng.random((n_ts, timepoints.size))

    df = pd.DataFrame(
        values,
        columns=timepoints,
        index=idx,
    )

    return df