Skip to content

pandas_openscm.testing#

Testing helpers

Placed here to avoid putting __init__.py files in our tests directory, see details here: https://docs.pytest.org/en/stable/explanation/goodpractices.html#which-import-mode. Also see here: https://docs.pytest.org/en/stable/explanation/pythonpath.html#pytest-import-mechanisms-and-sys-path-pythonpath.

Functions:

Name Description
assert_frame_alike

Assert that two pd.DataFrame are alike

assert_move_plan_equal

Assert that two MovePlan are equal

changer

Change a value

check_result

Check result in the case where it could be multiple types

convert_to_desired_type

Convert a df to the desired type for testing

create_test_df

Create a pd.DataFrame to use in testing

assert_frame_alike #

assert_frame_alike(
    res: DataFrame,
    exp: DataFrame,
    check_like: bool = True,
    **kwargs: Any,
) -> None

Assert that two pd.DataFrame are alike

Here, alike means that they have the same data, just potentially not in the same order. This includes the order of index levels, which may also differ.

Parameters:

Name Type Description Default
res DataFrame

Result to check

required
exp DataFrame

Expected result

required
check_like bool

Passed to assert_frame_equal

True
**kwargs Any

Passed to assert_frame_equal

{}
Source code in src/pandas_openscm/testing.py
def assert_frame_alike(
    res: pd.DataFrame, exp: pd.DataFrame, check_like: bool = True, **kwargs: Any
) -> None:
    """
    Assert that two [pd.DataFrame][pandas.DataFrame] are alike

    Here, alike means that they have the same data,
    just potentially not in the same order.
    This includes the order of index levels, which may also differ.

    Parameters
    ----------
    res
        Result to check

    exp
        Expected result

    check_like
        Passed to [assert_frame_equal][pandas.testing.assert_frame_equal]

    **kwargs
        Passed to [assert_frame_equal][pandas.testing.assert_frame_equal]
    """
    pd.testing.assert_frame_equal(
        res.reorder_levels(exp.index.names),  # type: ignore # pandas-stubs confused
        exp,
        check_like=check_like,
        **kwargs,
    )

assert_move_plan_equal #

assert_move_plan_equal(
    res: MovePlan, exp: MovePlan
) -> None

Assert that two MovePlan are equal

Parameters:

Name Type Description Default
res MovePlan

The result

required
exp MovePlan

The expectation

required

Raises:

Type Description
AssertionError

res and exp are not equal

Source code in src/pandas_openscm/testing.py
def assert_move_plan_equal(res: MovePlan, exp: MovePlan) -> None:
    """
    Assert that two [MovePlan][(p).db.rewriting.] are equal

    Parameters
    ----------
    res
        The result

    exp
        The expectation

    Raises
    ------
    AssertionError
        `res` and `exp` are not equal
    """
    # Check that the indexes are the same.
    # We convert to MultiIndex first as we don't care about the actual index values.
    pd.testing.assert_index_equal(
        pd.MultiIndex.from_frame(res.moved_index.reset_index()),
        pd.MultiIndex.from_frame(exp.moved_index.reset_index()),
        check_order=False,
    )
    pd.testing.assert_series_equal(
        res.moved_file_map, exp.moved_file_map, check_like=True
    )

    if res.rewrite_actions is None:
        assert exp.rewrite_actions is None
    else:
        if exp.rewrite_actions is None:
            msg = f"{exp.rewrite_actions=} while {res.rewrite_actions=}"
            raise AssertionError(msg)

        assert len(res.rewrite_actions) == len(exp.rewrite_actions)
        for res_rwa in res.rewrite_actions:
            for exp_rwa in exp.rewrite_actions:
                if res_rwa.from_file == exp_rwa.from_file:
                    break
            else:
                msg = f"Did not find pair for\n{res_rwa=}\nin\n{exp.rewrite_actions=}"
                raise AssertionError(msg)

            pd.testing.assert_index_equal(
                res_rwa.locator, exp_rwa.locator, check_order=False
            )
            assert res_rwa.to_file == exp_rwa.to_file

    if res.delete_paths is None:
        assert exp.delete_paths is None
    else:
        if exp.delete_paths is None:
            msg = f"{exp.delete_paths=} while {res.delete_paths=}"
            raise AssertionError(msg)

        assert set(res.delete_paths) == set(exp.delete_paths)

changer #

changer(
    inv: float, factor: float, *, exponent: float = 1.0
) -> float

Change a value

This is just meant as a helper for our tests

Source code in src/pandas_openscm/testing.py
def changer(inv: float, factor: float, *, exponent: float = 1.0) -> float:
    """
    Change a value

    This is just meant as a helper for our tests
    """
    return cast(float, (inv * 2.0) ** exponent)

check_result #

check_result(res: P, exp: P) -> None

Check result in the case where it could be multiple types

Specifically, pd.DataFrame or pd.Series.

This is a thin wrapper, if you want specific functionality, use the underlying function.

Parameters:

Name Type Description Default
res P

Result

required
exp P

Expected

required

Raises:

Type Description
TypeError

Type of res is not the same as the type of exp

Source code in src/pandas_openscm/testing.py
def check_result(res: P, exp: P) -> None:
    """
    Check result in the case where it could be multiple types

    Specifically, [pd.DataFrame][pandas.DataFrame]
    or [pd.Series][pandas.Series].

    This is a thin wrapper, if you want specific functionality,
    use the underlying function.

    Parameters
    ----------
    res
        Result

    exp
        Expected

    Raises
    ------
    TypeError
        Type of `res` is not the same as the type of `exp`
    """
    if isinstance(res, pd.DataFrame):
        if not isinstance(exp, pd.DataFrame):  # pragma: no cover
            msg = f"{type(res)=} while {type(exp)=}"
            raise TypeError(msg)

        assert_frame_alike(res, exp)

    elif isinstance(res, pd.Series):
        if not isinstance(exp, pd.Series):  # pragma: no cover
            msg = f"{type(res)=} while {type(exp)=}"
            raise TypeError(msg)

        pd.testing.assert_series_equal(res, exp)

    else:  # pragma: no cover
        raise NotImplementedError(type(res))

convert_to_desired_type #

convert_to_desired_type(
    df: DataFrame, pobj_type: Literal["DataFrame"]
) -> DataFrame
convert_to_desired_type(
    df: DataFrame, pobj_type: Literal["Series"]
) -> Series[Any]
convert_to_desired_type(
    df: DataFrame, pobj_type: Literal["DataFrame", "Series"]
) -> DataFrame | Series[Any]

Convert a df to the desired type for testing

Parameters:

Name Type Description Default
df DataFrame

pd.DataFrame to convert

required
pobj_type Literal['DataFrame', 'Series']

Type to convert to

If "DataFrame", then df is simply returned. If "Series", then the first column of df is returned.

required

Returns:

Type Description
DataFrame | Series[Any]

df converted to the desired type

Source code in src/pandas_openscm/testing.py
def convert_to_desired_type(
    df: pd.DataFrame, pobj_type: Literal["DataFrame", "Series"]
) -> pd.DataFrame | pd.Series[Any]:
    """
    Convert a `df` to the desired type for testing

    Parameters
    ----------
    df
        [pd.DataFrame][pandas.DataFrame] to convert

    pobj_type
        Type to convert to

        If "DataFrame", then `df` is simply returned.
        If "Series", then the first column of `df` is returned.

    Returns
    -------
    :
        `df` converted to the desired type
    """
    if pobj_type == "DataFrame":
        return df

    if pobj_type == "Series":
        res = df[df.columns[0]]
        return res

    raise NotImplementedError(pobj_type)  # pragma: no cover

create_test_df #

create_test_df(
    *,
    variables: Collection[tuple[str, str]],
    n_scenarios: int,
    n_runs: int,
    timepoints: NDArray[floating[Any]],
    rng: Generator | None = None,
) -> DataFrame

Create a pd.DataFrame to use in testing

This uses the idea of simple climate model runs, where you have a number of scenarios, each of which has a number of variables from a number of different model runs with output for a number of different time points.

The result will contain all combinations of scenarios, variables and runs, with the units being defined by each variable.

Parameters:

Name Type Description Default
variables Collection[tuple[str, str]]

Variables and their units to create

required
n_scenarios int

Number of scenarios to create.

These are simply incremented with their number.

required
n_runs int

Number of runs to create.

These are simply numbered.

required
timepoints NDArray[floating[Any]]

Time points to use with the data.

required
rng Generator | None

Random number generator to use.

If not supplied, we create an instance using numpy.random.default_rng.

None

Returns:

Type Description
DataFrame

Generated test pd.DataFrame.

Source code in src/pandas_openscm/testing.py
def create_test_df(
    *,
    variables: Collection[tuple[str, str]],
    n_scenarios: int,
    n_runs: int,
    timepoints: np.typing.NDArray[np.floating[Any]],
    rng: np.random.Generator | None = None,
) -> pd.DataFrame:
    """
    Create a [pd.DataFrame][pandas.DataFrame] to use in testing

    This uses the idea of simple climate model runs,
    where you have a number of scenarios,
    each of which has a number of variables
    from a number of different model runs
    with output for a number of different time points.

    The result will contain all combinations of scenarios,
    variables and runs,
    with the units being defined by each variable.

    Parameters
    ----------
    variables
        Variables and their units to create

    n_scenarios
        Number of scenarios to create.

        These are simply incremented with their number.

    n_runs
        Number of runs to create.

        These are simply numbered.

    timepoints
        Time points to use with the data.

    rng
        Random number generator to use.

        If not supplied, we create an instance using
        [numpy.random.default_rng](https://numpy.org/doc/stable/reference/random/generator.html#numpy.random.default_rng).

    Returns
    -------
    :
        Generated test [pd.DataFrame][pandas.DataFrame].
    """
    if rng is None:
        rng = np.random.default_rng()

    idx = pd.MultiIndex.from_frame(
        pd.DataFrame(
            (
                (s, v_info[0], r, v_info[1])
                for s, v_info, r in itertools.product(
                    [f"scenario_{i}" for i in range(n_scenarios)],
                    variables,
                    [i for i in range(n_runs)],
                )
            ),
            columns=["scenario", "variable", "run", "unit"],
        )
    )

    n_variables = len(variables)
    n_ts = n_scenarios * n_variables * n_runs

    # Give the data a bit of structure so it looks different when plotted.
    values = 50.0 * np.linspace(0.3, 1, n_ts)[:, np.newaxis] * np.linspace(
        0, 1, timepoints.size
    )[np.newaxis, :] + rng.random((n_ts, timepoints.size))

    df = pd.DataFrame(
        values,
        columns=timepoints,
        index=idx,
    )

    return df