diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 00000000..7d9d732d --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,253 @@ +# Architecture + +## 1. Directory Structure + +``` +chainladder-python/ +├── chainladder/ # Main package +│ ├── __init__.py # Public API, global options, sample data loader +│ │ +│ ├── core/ # Triangle data structure +│ │ ├── triangle.py # Triangle (the public-facing class) +│ │ ├── base.py # TriangleBase (assembles all mixins) +│ │ ├── common.py # Common (shared helpers used by Triangle and estimators) +│ │ ├── dunders.py # TriangleDunders (arithmetic, comparison operators) +│ │ ├── pandas.py # TrianglePandas, TriangleGroupBy (pandas-style API) +│ │ ├── slice.py # TriangleSlicer, Location, Ilocation, At, Iat, VirtualColumns +│ │ ├── display.py # TriangleDisplay (__repr__, _repr_html_) +│ │ ├── io.py # TriangleIO, EstimatorIO (pickle, JSON, spreadsheet I/O) +│ │ ├── typing.py # TriangleProtocol, type aliases (BackendArray, etc.) +│ │ └── tests/ +│ │ +│ ├── development/ # Development pattern estimators +│ │ ├── base.py # DevelopmentBase (shared fit/transform logic) +│ │ ├── development.py # Development (weighted LDF, volume/simple/regression) +│ │ ├── constant.py # DevelopmentConstant (user-supplied LDFs) +│ │ ├── incremental.py # IncrementalAdditive +│ │ ├── munich.py # MunichAdjustment (paid/incurred correlation) +│ │ ├── clark.py # ClarkLDF (growth-curve LDFs) +│ │ ├── glm.py # TweedieGLM +│ │ ├── barnzehn.py # BarnettZehnwirth (extends TweedieGLM) +│ │ ├── learning.py # DevelopmentML (sklearn regressor wrapper) +│ │ ├── outstanding.py # CaseOutstanding +│ │ └── tests/ +│ │ +│ ├── tails/ # Tail factor estimators +│ │ ├── base.py # TailBase (extends DevelopmentBase) +│ │ ├── constant.py # TailConstant +│ │ ├── curve.py # TailCurve (exponential/inverse-power extrapolation) +│ │ ├── bondy.py # TailBondy +│ │ ├── clark.py # TailClark +│ │ └── tests/ +│ │ +│ ├── methods/ # Reserve estimation methods +│ │ ├── base.py # MethodBase (fit/predict contract) +│ │ ├── chainladder.py # Chainladder +│ │ ├── mack.py # MackChainladder (extends Chainladder) +│ │ ├── benktander.py # Benktander +│ │ ├── bornferg.py # BornhuetterFerguson (extends Benktander) +│ │ ├── capecod.py # CapeCod (extends Benktander) +│ │ ├── expectedloss.py # ExpectedLoss (extends Benktander) +│ │ └── tests/ +│ │ +│ ├── adjustments/ # Pre-processing transformers +│ │ ├── berqsherm.py # BerquistSherman +│ │ ├── bootstrap.py # BootstrapODPSample (extends DevelopmentBase) +│ │ ├── parallelogram.py # ParallelogramOLF +│ │ ├── trend.py # Trend, TrendConstant +│ │ └── tests/ +│ │ +│ ├── workflow/ # Pipeline and ensemble utilities +│ │ ├── gridsearch.py # GridSearch, Pipeline +│ │ ├── voting.py # VotingChainladder +│ │ └── tests/ +│ │ +│ ├── utils/ # Internal utilities +│ │ ├── sparse.py # COO sparse array wrapper +│ │ ├── cupy.py # CuPy GPU array shim +│ │ ├── dask.py # Dask parallel shim +│ │ ├── utility_functions.py # num_to_nan, set_common_backend, etc. +│ │ ├── weighted_regression.py # WeightedRegression helper +│ │ ├── triangle_weight.py # Triangle weighting helpers +│ │ ├── data/ # Bundled sample datasets (CSV) +│ │ └── tests/ +│ │ +│ └── tests/ +│ └── test_public_api.py # Smoke-tests for the public API surface +│ +├── docs/ # JupyterBook/Sphinx documentation source +├── conftest.py # pytest fixtures (raa, clrd, qtr, …) +├── pyproject.toml # Development environment +└── pyrightconfig.json # Type checking configuration +``` + +## 2. Inheritance Diagrams + +### 2a. Triangle + +`Triangle` is assembled from a stack of single-responsibility mixins. Python resolves methods left-to-right across the MRO (method resolution order), so the order in `TriangleBase` determines which mixin wins on any name collision. + +``` +object + └── ABC + └── Common core/common.py — backend switching, grain helpers, valuation utilities + └── TrianglePandas core/pandas.py — pandas-style API (to_frame, groupby, rename, …) + └── TriangleDunders core/dunders.py — arithmetic & comparison operators + └── TriangleSlicer core/slice.py — .loc / .iloc / .at / .iat / virtual columns + └── TriangleDisplay core/display.py — __repr__ / _repr_html_ + └── TriangleIO core/io.py — to_pickle / to_json / to_excel / … + │ + └── TriangleBase core/base.py — __init__, array allocation, grain/dim logic + │ (inherits all of the above as direct bases) + └── Triangle core/triangle.py — public class; thin layer, exposes full API +``` + +Supporting classes owned by the slice layer: + +``` +_LocBase + ├── Location (.loc accessor) + │ └── At (.at accessor) + └── Ilocation (.iloc accessor) + └── Iat (.iat accessor) + +VirtualColumns (lazy computed-column registry attached to Triangle) +TriangleGroupBy (returned by Triangle.groupby(); consumed by arithmetic operators) +``` + +These classes are related to `Triangle` by **composition**, not inheritance. `Triangle` does not extend `Location` or `Ilocation` — instead, each `Triangle` *instance* holds references to instances of these classes as its own attributes. `TriangleSlicer._set_slicers()` (called during `Triangle.__init__` and whenever the index or column shape changes) creates fresh instances and assigns them: + +```python +# core/slice.py +class TriangleSlicer: + def _set_slicers(self) -> None: + self.iloc = Ilocation(self) + self.loc = Location(self) + self.iat = Iat(self) + self.at = At(self) + self.virtual_columns = VirtualColumns(self, self.virtual_columns.columns) +``` + +Each accessor receives a reference back to the owning `Triangle` (`self`), which is how `triangle.iloc[0]` can read and slice the triangle's underlying arrays. The relationship is: + +``` +Triangle instance + ├── .iloc → Ilocation instance (wraps the same Triangle) + │ └── [0] calls back into Triangle to produce a sliced copy + ├── .loc → Location instance + ├── .iat → Iat instance + ├── .at → At instance + └── .virtual_columns → VirtualColumns instance +``` + +#### Type-hinting the mixin stack + +`core/typing.py` defines `TriangleProtocol`, a `typing.Protocol` that declares the interface every mixin assumes will be present on `self` - properties such as `shape`, `index`, `values`, `array_backend`, aggregation methods such as `sum`, and the indexer attributes `iloc`, `loc`, `at`, `iat`. + +**The problem with direct Protocol inheritance at runtime** + +A mixin that inherits from `TriangleProtocol` at runtime places the Protocol's stub descriptors into the MRO ahead of any concrete implementations provided by other mixins or `Triangle` itself. For example, if `TrianglePandas(TriangleProtocol)` were written literally, then `TriangleProtocol.set_backend` (a stub that returns `...`) would shadow `Common.set_backend` (the real implementation), causing `AttributeError` at runtime. The same applies to any Protocol stub that happens to match a name defined elsewhere in the mixin stack. + +**The adopted pattern: `TYPE_CHECKING` conditional base** + +Each mixin that needs `TriangleProtocol` for type-checking purposes declares its base class conditionally so that the Protocol is visible to Pyright/Pylance but is replaced by `object` at runtime: + +```python +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from chainladder.core.typing import TriangleProtocol + _MixinBase = TriangleProtocol +else: + _MixinBase = object + +class TriangleMixin(_MixinBase): + # Pyright sees TriangleProtocol as the base — self has .shape, .values, .sum, etc. + # At runtime the base is object — no Protocol stubs in the MRO. + ... +``` + +This is the pattern recommended by the Protocols page in the Python documentation. +[Explicitly declaring implementation](https://typing.python.org/en/latest/spec/protocol.html#explicitly-declaring-implementation). + +**Type-hinting methods that accept a Triangle-like object and return `Triangle`** + +Use `TriangleProtocol` for inputs and `Triangle` for the concrete return type. Import both under `TYPE_CHECKING` to keep the runtime import-free: + +```python +from __future__ import annotations +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from chainladder import Triangle + from chainladder.core.typing import TriangleProtocol + +def transform(X: TriangleProtocol) -> Triangle: + ... +``` + +- **Input typed as `TriangleProtocol`**: accepts any object that structurally satisfies the protocol (a real `Triangle`, a mock in tests, a future subclass) without requiring a concrete import. +- **Return typed as `Triangle`**: callers know they get the fully-featured concrete class with all mixin methods available, not just the minimal protocol surface. + +Both imports live inside `if TYPE_CHECKING:`, so they are never executed at runtime and cannot create circular-import cycles. The `from __future__ import annotations` at the top of the file makes all annotations lazy strings, which means the names are never resolved at import time even if `TYPE_CHECKING` is `False`. + +--- + +### 2b. Estimators + +All estimators follow the scikit-learn `BaseEstimator` / `TransformerMixin` / `fit` / `transform` / `predict` contract. + +``` +sklearn.BaseEstimator + │ + ├── EstimatorIO core/io.py — to_pickle / to_json for fitted estimators + │ + └── DevelopmentBase development/base.py — shared LDF fitting helpers + │ + ├── Development development/development.py — weighted LDF (volume/simple/regression) + ├── DevelopmentConstant development/constant.py + ├── IncrementalAdditive development/incremental.py + ├── MunichAdjustment development/munich.py + ├── ClarkLDF development/clark.py + ├── DevelopmentML development/learning.py + ├── CaseOutstanding development/outstanding.py + ├── TweedieGLM development/glm.py + │ └── BarnettZehnwirth development/barnzehn.py + ├── BootstrapODPSample adjustments/bootstrap.py + │ + └── TailBase tails/base.py — appends tail column, extends DevelopmentBase + ├── TailConstant tails/constant.py + ├── TailCurve tails/curve.py + ├── TailBondy tails/bondy.py + └── TailClark tails/clark.py + + +sklearn.BaseEstimator + └── Common core/common.py + └── EstimatorIO core/io.py + └── MethodBase methods/base.py — fit/predict contract for reserve methods + │ + ├── Chainladder methods/chainladder.py + │ └── MackChainladder methods/mack.py + │ + └── Benktander methods/benktander.py + ├── BornhuetterFerguson methods/bornferg.py + ├── CapeCod methods/capecod.py + └── ExpectedLoss methods/expectedloss.py + + +sklearn.BaseEstimator + TransformerMixin + EstimatorIO (standalone transformers) + ├── BerquistSherman adjustments/berqsherm.py + ├── ParallelogramOLF adjustments/parallelogram.py + ├── Trend adjustments/trend.py + └── TrendConstant adjustments/trend.py + + +sklearn.BaseEstimator + └── GridSearch workflow/gridsearch.py + └── Pipeline workflow/gridsearch.py (extends sklearn Pipeline + EstimatorIO) + + +MethodBase + └── VotingChainladder workflow/voting.py (ensemble of MethodBase estimators) +``` diff --git a/chainladder/core/base.py b/chainladder/core/base.py index ce3aded7..24824577 100644 --- a/chainladder/core/base.py +++ b/chainladder/core/base.py @@ -35,6 +35,7 @@ ) if TYPE_CHECKING: + from chainladder import Triangle from pandas import ( DataFrame, Series @@ -546,7 +547,7 @@ def get_array_module( "Array backend is invalid or not properly set. Supported backends are: " + ', '.join([*modules]) ) from e - def _auto_sparse(self) -> None: + def _auto_sparse(self) -> Triangle: """ Auto sparsifies at 30Mb or more and 20% density or less. """ diff --git a/chainladder/core/pandas.py b/chainladder/core/pandas.py index f32b1405..dbb904b2 100644 --- a/chainladder/core/pandas.py +++ b/chainladder/core/pandas.py @@ -13,8 +13,11 @@ __dt64_dtype__, _warn_dask_parallel_deprecated, ) -from chainladder.core.typing import TriangleProtocol -from chainladder.utils.utility_functions import num_to_nan +from chainladder.utils.utility_functions import ( + concat, + num_to_nan +) + from typing import ( cast, TYPE_CHECKING @@ -29,7 +32,7 @@ if TYPE_CHECKING: from chainladder import Triangle from chainladder.utils.sparse import COO - from chainladder.core.typing import BackendArray + from chainladder.core.typing import BackendArray, TriangleProtocol from collections.abc import Callable from numpy import ndarray from pandas import ( @@ -45,6 +48,9 @@ Literal, Type ) + _TrianglePandasBase = TriangleProtocol +else: + _TrianglePandasBase = object @@ -71,14 +77,10 @@ def __getitem__(self, key): ) -class TrianglePandas: - # Stubs to supress type checker warnings. Refer to typing.TriangleProtocol for actual - # typing. Remove once linters improve. - if TYPE_CHECKING: - values: np.ndarray +class TrianglePandas(_TrianglePandasBase): def to_frame( - self: TriangleProtocol, + self, origin_as_datetime: bool = True, keepdims: bool = False, implicit_axis: bool = False, @@ -112,16 +114,16 @@ def to_frame( if keepdims: is_val_tri: bool = self.is_val_tri obj: Triangle = self.val_to_dev().set_backend("sparse") - obj.values = cast("COO", obj.values) - out: DataFrame = pd.DataFrame(obj.index.iloc[obj.values.coords[0]]) - out["columns"] = obj.columns[obj.values.coords[1]] + values: COO = cast("COO", obj.values) + out: DataFrame = pd.DataFrame(obj.index.iloc[values.coords[0]]) + out["columns"] = obj.columns[values.coords[1]] missing_cols: list = list(set(self.columns) - set(out['columns'])) if origin_as_datetime: - out["origin"] = obj.odims[obj.values.coords[2]] + out["origin"] = obj.odims[values.coords[2]] else: - out["origin"] = obj.origin[obj.values.coords[2]] - out["development"] = obj.ddims[obj.values.coords[3]] - out["values"] = obj.values.data + out["origin"] = obj.origin[values.coords[2]] + out["development"] = obj.ddims[values.coords[3]] + out["values"] = values.data out: DataFrame = pd.pivot_table( out, index=obj.key_labels + ["origin", "development"], columns="columns" ) @@ -280,7 +282,7 @@ def _get_axis(axis: Literal['index', 'columns', 'origin', 'development'] | int | "integer representation of the desired axis." ) - def dropna(self: TriangleProtocol) -> Triangle: + def dropna(self) -> Triangle: """ Method that removes origin/development vectors from edge of a triangle that are all missing values. Does not work on the interior @@ -539,7 +541,7 @@ def dropna(self: TriangleProtocol) -> Triangle: obj = self[(self.origin >= min_odim) & (self.origin <= max_odim)] return obj - def fillna(self: TriangleProtocol, value: int | float | ndarray, inplace: bool = False) -> Triangle: + def fillna(self, value: int | float | ndarray, inplace: bool = False) -> Triangle: """Fill nan with 'value' by axis. Parameters @@ -569,7 +571,7 @@ def fillna(self: TriangleProtocol, value: int | float | ndarray, inplace: bool = cast("TriangleProtocol", cast(object, new_obj)).fillna(value=value, inplace=True) return new_obj - def fillzero(self: TriangleProtocol, inplace: bool = False) -> Triangle: + def fillzero(self, inplace: bool = False) -> Triangle: """Fill nan with 0 by axis. separate function from fillna() because fillna(0) isn't working. Parameters @@ -595,7 +597,7 @@ def fillzero(self: TriangleProtocol, inplace: bool = False) -> Triangle: cast("TriangleProtocol", cast(object, new_obj)).fillzero(inplace=True) return new_obj - def drop(self, labels=None, axis=1): + def drop(self, labels: str | int | list | None = None, axis: int = 1) -> Triangle: """Drop specified labels from rows or columns. Remove rows or columns by specifying label names and corresponding axis, @@ -604,7 +606,7 @@ def drop(self, labels=None, axis=1): Parameters ----------- - label: single label or list-like + labels: str | int | list | None Index or column labels to drop. axis: {0 or ‘index’, 1 or ‘columns’}, default 1 @@ -620,29 +622,43 @@ def drop(self, labels=None, axis=1): if axis == 1: return self[[item for item in self.columns if item not in labels]] else: - raise NotImplementedError("drop only inpemented for column axis") + raise NotImplementedError("Triangle.drop() only implemented for column axis.") @property - def T(self): + def T(self) -> DataFrame: # noqa: N802 + """ + Converts the Triangle to a Pandas DataFrame and then transposes it. + + Returns + ------- + DataFrame + + """ return self.to_frame(origin_as_datetime=False).T - def groupby(self, by, axis=0, *args, **kwargs): - """Group Triangle by index values. If the triangle is convertable to a + def groupby(self, by: str | list, axis: Literal[0, 1, 2, 3] = 0) -> TriangleGroupBy: + """ + Group Triangle by index values. If the triangle is convertable to a DataFrame, then it defaults to pandas groupby functionality. Parameters ---------- - by: str or list + by: str | list The index to group by + axis: int + The axis the groupby applies to. + Returns ------- + TriangleGroupBy GroupBy object (pandas or Triangle) """ - return TriangleGroupBy(self, by, axis) + return TriangleGroupBy(cast("Triangle", cast(object, self)), by, axis) - def append(self, other): - """Append rows of other to the end of caller, returning a new object. + def append(self, other: Triangle) -> Triangle: + """ + Append rows of another Triangle to self, returning an updated Triangle. Parameters ---------- @@ -653,20 +669,19 @@ def append(self, other): ------- New Triangle with appended data. """ - from chainladder.utils.utility_functions import concat return concat((self, other), 0) def rename( - self: TriangleProtocol, + self, axis: Literal['index', 'columns', 'origin', 'development'] | int, value: list | str | dict - ): + ) -> Triangle: """Alter axes labels. Parameters ---------- - axis: str or int + axis: Literal['index', 'columns', 'origin', 'development'] | int A value of 0 <= axis <= 4 corresponding to axes 'index', 'columns', 'origin', 'development' respectively. Both the int and str representation can be used. @@ -679,7 +694,7 @@ def rename( Triangle with relabeled axis. """ - if type(value) is dict: + if isinstance(value, dict): if axis == "columns" or axis == 1: full_dict = dict(zip(self.columns.values,self.columns.values)) full_dict.update(value) @@ -704,9 +719,9 @@ def rename( "'columns', 'origin', or 'development', or an integer in the interval [0, 4] specifying the" " axis to be modified." ) - return self + return cast("Triangle", cast(object, self)) - def astype(self: TriangleProtocol, dtype, inplace=True): + def astype(self, dtype, inplace=True) -> Triangle: """Copy of the array, cast to a specified type. Parameters @@ -722,9 +737,9 @@ def astype(self: TriangleProtocol, dtype, inplace=True): """ obj = self.copy() if inplace is False else self obj.values = obj.values.astype(dtype) - return obj + return cast("Triangle", obj) - def head(self: TriangleProtocol, n: int=5): + def head(self, n: int = 5) -> Triangle: """Return the first ``n`` triangles along the index axis. Parameters @@ -738,7 +753,7 @@ def head(self: TriangleProtocol, n: int=5): """ return self.iloc[:n] - def tail(self: TriangleProtocol, n: int=5): + def tail(self, n: int = 5) -> Triangle: """Return the last ``n`` triangles along the index axis. Parameters @@ -752,16 +767,20 @@ def tail(self: TriangleProtocol, n: int=5): """ return self.iloc[-n:] - def sort_index(self: TriangleProtocol, *args, **kwargs): + def sort_index(self, *args, **kwargs) -> Triangle: """Sort Triangle rows by index labels. Returns ------- Triangle """ - return self.iloc[self.index.sort_values(self.key_labels, *args, **kwargs).index] + sorted_index: DataFrame = cast( + "DataFrame", + self.index.sort_values(self.key_labels, *args, **kwargs) + ) + return self.iloc[sorted_index.index] - def exp(self: TriangleProtocol): + def exp(self) -> Triangle: """Return the exponential of each element. Returns @@ -770,7 +789,7 @@ def exp(self: TriangleProtocol): """ return self.get_array_module().exp(self) - def log(self: TriangleProtocol): + def log(self) -> Triangle: """Return the natural logarithm of each element. Returns @@ -779,21 +798,22 @@ def log(self: TriangleProtocol): """ return self.get_array_module().log(self) - def minimum(self: TriangleProtocol, other): + def minimum(self, other: Triangle | int | float) -> Triangle: """Element-wise minimum of this Triangle and another operand. + See :func:`chainladder.minimum` for parameters, usage, and examples. """ return self.get_array_module().minimum(self, other) - def maximum(self: TriangleProtocol, other): + def maximum(self, other: Triangle | int | float) -> Triangle: """Element-wise maximum of this Triangle and another operand. See :func:`chainladder.maximum` for parameters, usage, and examples. """ return self.get_array_module().maximum(self, other) - def sqrt(self: TriangleProtocol): + def sqrt(self) -> Triangle: """Return the non-negative square root of each element. Returns @@ -802,7 +822,7 @@ def sqrt(self: TriangleProtocol): """ return self.get_array_module().sqrt(self) - def round(self, decimals=0, *args, **kwargs): + def round(self, decimals: int = 0) -> Triangle: """Round each element to the given number of decimal places. Uses banker's rounding (round half to even). For example, @@ -818,20 +838,34 @@ def round(self, decimals=0, *args, **kwargs): ------- Triangle """ - return round(self, decimals) + return cast("Triangle", cast(object, self.__round__(decimals))) def xs( - self: TriangleProtocol, - index_key:IndexLabel, - level:IndexLabel | None = None, - drop_level:bool = True): - ''' + self, + index_key: IndexLabel, + level: IndexLabel | None = None, + drop_level: bool = True) -> Triangle: + """ Mimics xs from pandas. key difference is that this function only slices the index, therefore axis is always 0 and not an argument in the function Main use case for this function is when slicing beyond the first field in the index (such as LOB in the clrd dataset) - ''' + + Parameters + ---------- + index_key: IndexLabel + Label contained in the index. + level: IndexLabel | None = None + Level to take the cross-section on. + drop_level: bool = True + If False, returns object with same levels as self. + + Returns + ------- + Triangle + Cross-section from the original Triangle corresponding to the selected index levels. + """ mi = pd.MultiIndex.from_frame(self.index) lvl = 0 if level is None else level @@ -846,7 +880,7 @@ def xs( new_ax_df = new_ax.to_frame(index=None)[new_ax.names] result.index = new_ax_df else: - result.index = pd.DataFrame(data=['Total'],columns=['Total']) + result.index = pd.DataFrame(data=['Total'], columns=pd.Index(['Total'])) return result def add_triangle_agg_func( @@ -1048,15 +1082,15 @@ def set_method( "pct_chg", ] ) -for item in df_passthru: - add_df_passthru(TrianglePandas, item) +for method in df_passthru: + add_df_passthru(TrianglePandas, method) agg_funcs = ["sum", "mean", "median", "max", "min", "prod", "var"] agg_funcs = agg_funcs + ["std", "cumsum", "quantile"] -for k in agg_funcs: - add_groupby_agg_func(TriangleGroupBy, k, k) +for func in agg_funcs: + add_groupby_agg_func(TriangleGroupBy, func, func) agg_funcs = {item: "nan" + item for item in agg_funcs} more_aggs = ["diff"] agg_funcs = {**agg_funcs, **{item: item for item in more_aggs}} -for k, v in agg_funcs.items(): - add_triangle_agg_func(TrianglePandas, k, v) +for method, func in agg_funcs.items(): + add_triangle_agg_func(TrianglePandas, method, func) diff --git a/chainladder/core/tests/test_arithmetic.py b/chainladder/core/tests/test_arithmetic.py index a16d16e3..714f1dd1 100644 --- a/chainladder/core/tests/test_arithmetic.py +++ b/chainladder/core/tests/test_arithmetic.py @@ -1,5 +1,6 @@ from __future__ import annotations import numpy as np +import pandas as pd import pytest from typing import TYPE_CHECKING @@ -75,6 +76,67 @@ def test_arithmetic_grain_mismatch_raises(raa: Triangle, qtr: Triangle) -> None: raa + qtr +def test_non_overlapping_odims(raa: Triangle) -> None: + """ + Union of same-shape triangles with non-overlapping origin rows. + + Parameters + ---------- + raa : Triangle + The raa sample data set Triangle. + + Returns + ------- + None + """ + a = raa.iloc[..., :5, :] + b = raa.iloc[..., 5:, :] + result = a + b + assert result == raa + + +def test_arithmetic_union_val_tri(raa: Triangle) -> None: + """ + Union of non-overlapping valuation triangle slices preserves DatetimeIndex ddims. + + Parameters + ---------- + raa: Triangle + The raa sample data set fixture. + + Returns + ------- + None + """ + val_raa = raa.dev_to_val() + a = val_raa[val_raa.valuation < '1987'] + b = val_raa[val_raa.valuation >= '1987'] + result = a + b + assert isinstance(result.ddims, pd.DatetimeIndex) + assert result.shape == val_raa.shape + assert result == val_raa + + +def test_origin_broadcasting(raa: Triangle) -> None: + """ + Adding a single-origin triangle to a multi-origin triangle broadcasts odims. + + Parameters + ---------- + raa: Triangle + The raa sample data set fixture. + + Returns + ------- + None + """ + single_origin = raa.sum('origin') + single_origin['values'] = 500 + result = raa + single_origin + assert result.shape == raa.shape + assert result == raa + 500 + + def test_arithmetic_union(raa): assert raa.shape == (raa - raa[raa.valuation < "1987"]).shape assert raa[raa.valuation<'1986'] + raa[raa.valuation>='1986'] == raa @@ -90,13 +152,61 @@ def test_arithmetic_1(raa): assert 1 - (x / x) == 0 * x * 0 +def test_eq_non_triangle(raa: Triangle) -> None: + """ + Triangle compared to a non-Triangle returns False. + + Parameters + ---------- + raa: Triangle + The raa sample data set fixture. + + Returns + ------- + None + """ + assert (raa == 42) is False + assert (raa == "foo") is False + assert (raa == None) is False + + +def test_pow_groupby(clrd: Triangle) -> None: + """__pow__ via TriangleGroupBy path when index keys differ between operands.""" + a = clrd["CumPaidLoss"] + result = a ** a.groupby("LOB").sum() + assert result.shape == a.shape + # x^0 == 1 for every computed cell: predictable value check without overflow + zeros_gb = (a * 0).groupby("LOB").sum() + result_exp_zero = (a ** zeros_gb).set_backend("numpy") + non_nan = result_exp_zero.values[~np.isnan(result_exp_zero.values)] + assert len(non_nan) > 0 + assert np.all(non_nan == 1.0) + + def test_rtruediv(raa): xp = raa.get_array_module() assert xp.nansum(abs(((1 / raa) * raa).values[0, 0] - raa.nan_triangle)) < 0.00001 -def test_vector_division(raa): - raa.latest_diagonal / raa +def test_vector_division(raa: Triangle) -> None: + """ + Divide latest diagonal by triangle. Each element in the resulting triangle should be equal to the latest + diagonal value in the corresponding origin period divided by the original value. + + Parameters + ---------- + raa: Triangle + + Returns + ------- + None + """ + result = raa.latest_diagonal / raa + assert result.shape == raa.shape + for i in range(raa.shape[2]): + orig = raa.iloc[..., i:i+1, :] + ld = raa.latest_diagonal.iloc[..., i:i+1, :] + assert result.iloc[..., i:i+1, :] == ld / orig def test_multiindex_broadcast(clrd): @@ -141,7 +251,7 @@ def test_index_broadacsting4(clrd): idx['New Field'] = 'New' b.index = idx with pytest.raises(ValueError, match="Index broadcasting is ambiguous"): - a + b + _= a + b def test_index_broadcasting5(clrd): """ If a and b have shared key labels but no matching levels, then they will stack """ diff --git a/chainladder/core/tests/test_triangle.py b/chainladder/core/tests/test_triangle.py index b711fd4c..4e47728c 100644 --- a/chainladder/core/tests/test_triangle.py +++ b/chainladder/core/tests/test_triangle.py @@ -193,6 +193,29 @@ def test_base_minimum_exposure_triangle(raa): cl.Triangle(d, origin="index", columns=d.columns[-1]) +def test_development_before_origin_warns_and_drops() -> None: + """ + Rows where development precedes origin are invalid. Triangle.__init__ should + emit a UserWarning and silently drop those rows. + + Returns + ------- + None + """ + df = pd.DataFrame({ + "origin": [2000, 2000, 2001, 2001], + "development": [2001, 2002, 2000, 2002], # 2001/2000 row is invalid + "value": [100, 200, 999, 300], + }) + with pytest.warns(UserWarning, match="development before"): + tri = cl.Triangle( + df, origin="origin", development="development", + columns="value", cumulative=True, + ) + # The invalid row (value=999) must not appear in the triangle. + assert 999 not in tri.to_frame().values + + def test_origin_and_value_setters(raa): raa2 = raa.copy() raa.columns = list(raa.columns) @@ -382,6 +405,192 @@ def test_groupby_agg_auto_sparse(prism: Triangle) -> None: assert result_default == result_no_sparse +def test_auto_sparse_disabled_returns_self(prism: Triangle) -> None: + """ + When cl.options.AUTO_SPARSE is False, _auto_sparse() returns the triangle + unchanged without switching backends. + + Parameters + ---------- + prism : Triangle + The prism sample data set Triangle. + + Returns + ------- + None + """ + dense = prism.set_backend("numpy") + cl.options.set_option("AUTO_SPARSE", False) + try: + result = dense._auto_sparse() + assert result is dense + assert result.array_backend == "numpy" + finally: + cl.options.reset_option("AUTO_SPARSE") + + +def test_auto_sparse_converts_numpy_to_sparse(prism: Triangle) -> None: + """ + _auto_sparse() should convert a numpy-backed triangle to sparse when it is + large enough (> 30Mb) and sparse enough (density <= 20%). + + Parameters + ---------- + prism: Triangle + The prism sample data set Triangle. + + Returns + ------- + None + """ + # Slice down to the fewest claims (66) whose dense (index, columns, + # origin, development) shape still clears the 30Mb/8-byte-float + # threshold in _auto_sparse(); the full prism triangle is ~2B cells and + # would need ~15GB as a dense numpy array. + small_prism = prism.iloc[:66] + dense = small_prism.set_backend("numpy") + assert dense.array_backend == "numpy" + + result = dense._auto_sparse() + + assert result is dense + assert result.array_backend == "sparse" + + +def test_subtriangles(raa: Triangle) -> None: + """ + subtriangles should list the attributes on a Triangle instance that are + themselves Triangle instances, e.g. the ldf_/sigma_/std_err_ triangles + attached by Development.fit_transform. A plain Triangle with no such + attributes should report an empty list. + + Parameters + ---------- + raa : Triangle + The raa sample data set. + + Returns + ------- + None + """ + assert raa.subtriangles == [] + + fit = cl.Development().fit_transform(raa) + + assert set(fit.subtriangles) == { + "std_err_", "ldf_", "sigma_", "std_residuals_", "w_v2_" + } + + +def test_array_dunder(raa: Triangle) -> None: + """ + __array__ lets numpy treat a Triangle as array-like, e.g. via np.asarray() + or np.array(), returning the underlying values. + + Parameters + ---------- + raa : Triangle + The raa sample data set. + + Returns + ------- + None + """ + arr = np.asarray(raa) + + assert arr is raa.values + np.testing.assert_array_equal(np.array(raa), raa.values) + + +def test_triangle_from_dataframe_interchange_protocol() -> None: + """ + Triangle() should accept any object supporting the __dataframe__ + interchange protocol (e.g. a polars DataFrame), converting it to a + pandas DataFrame via _interchange_dataframe() under the hood. + + Returns + ------- + None + """ + polars = pytest.importorskip("polars") + + df = pd.DataFrame( + { + "origin": ["2020-01-01", "2020-01-01", "2021-01-01", "2021-01-01"], + "development": ["2020-12-31", "2021-12-31", "2021-12-31", "2022-12-31"], + "values": [100, 150, 120, 180], + } + ) + pl_df = polars.from_pandas(df) + assert hasattr(pl_df, "__dataframe__") + assert not isinstance(pl_df, pd.DataFrame) + + tri = cl.Triangle( + pl_df, + origin="origin", + development="development", + columns="values", + cumulative=True, + ) + expected = cl.Triangle( + df, + origin="origin", + development="development", + columns="values", + cumulative=True, + ) + + assert tri == expected + + +def test_array_function_unhandled_raises(raa: Triangle) -> None: + """ + __array_function__ should return NotImplemented for numpy functions that + are neither explicitly handled (e.g. np.concatenate, np.round) nor + aliases of a Triangle method of the same name (e.g. np.sum). numpy then + turns that NotImplemented into a TypeError. + + Parameters + ---------- + raa : Triangle + The raa sample data set. + + Returns + ------- + None + """ + assert "stack" not in dir(raa) + + with pytest.raises(TypeError): + np.stack([raa, raa]) + + +def test_array_function_mixed_types_raises(raa: Triangle) -> None: + """ + __array_function__ should return NotImplemented when one of the + dispatching argument types is not a Triangle subclass, even for a + handled function like np.concatenate. numpy then turns that + NotImplemented into a TypeError. + + Parameters + ---------- + raa : Triangle + The raa sample data set. + + Returns + ------- + None + """ + + class NotATriangle: + @staticmethod + def __array_function__(_func, _types, _args, _kwargs): + return NotImplemented + + with pytest.raises(TypeError): + np.concatenate([raa, NotATriangle()]) + + def test_get_axis_none(clrd: Triangle) -> None: """ Pass axis=None to TriangleGroupBy. Should be the same as passing axis=0. diff --git a/chainladder/core/typing.py b/chainladder/core/typing.py index 162b2187..dff48190 100644 --- a/chainladder/core/typing.py +++ b/chainladder/core/typing.py @@ -58,21 +58,28 @@ def shape(self) -> tuple[int, int, int, int]: ... @property def index(self) -> pd.DataFrame: ... + @index.setter + def index(self, value: pd.DataFrame) -> None: ... @property def is_val_tri(self) -> bool: ... @property def columns(self) -> pd.Index: ... + @columns.setter + def columns(self, value: Any) -> None: ... @property def origin(self) -> pd.PeriodIndex: ... + @origin.setter + def origin(self, value: Any) -> None: ... @property def development(self) -> pd.Series: ... + @development.setter + def development(self, value: Any) -> None: ... - @property - def valuation_date(self) -> pd.Timestamp: ... + valuation_date: pd.Timestamp @property def nan_triangle(self) -> BackendArray: ... @@ -102,8 +109,9 @@ def to_frame( implicit_axis: bool = False, ) -> DataFrame | Series: ... def sum(self, axis: str | int | None = None, *args, **kwargs) -> TriangleProtocol: ... # -> Self once Python 3.10 is deprecated. - def fillna(self, value: int | float | ndarray | None = None, inplace: bool = False) -> TriangleProtocol: ... # -> Self once Python 3.10 is deprecated. + def fillna(self, value: int | float | ndarray, inplace: bool = False) -> TriangleProtocol: ... # -> Self once Python 3.10 is deprecated. def fillzero(self, inplace: bool = False) -> TriangleProtocol: ... # -> Self once Python 3.10 is deprecated. + def __round__(self, ndigits: int = 0) -> TriangleProtocol: ... # -> Self once Python 3.10 is deprecated. def __add__(self, other: Any) -> TriangleProtocol: ... # -> Self once Python 3.10 is deprecated. def __radd__(self, other: Any) -> TriangleProtocol: ... # -> Self once Python 3.10 is deprecated. def __mul__(self, other: Any) -> TriangleProtocol: ... # -> Self once Python 3.10 is deprecated. diff --git a/uv.lock b/uv.lock index 17c27a92..890c73bd 100644 --- a/uv.lock +++ b/uv.lock @@ -286,7 +286,7 @@ requires-dist = [ { name = "nbsphinx", marker = "extra == 'docs'" }, { name = "numpy", specifier = ">=2.0" }, { name = "numpydoc", marker = "extra == 'docs'" }, - { name = "pandas", specifier = ">=2.3.3" }, + { name = "pandas", specifier = ">=2.3.3,<=3.0.3" }, { name = "parso", marker = "extra == 'docs'", specifier = ">=0.8" }, { name = "patsy", specifier = ">=1.0.2" }, { name = "polars", marker = "extra == 'docs'" },