From f3fe407be9bad7762ed04cdb317d931668376e11 Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Mon, 29 Jun 2026 01:39:18 -0400 Subject: [PATCH 01/16] Reword and reprioritize main.py TODO list --- code/main.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/code/main.py b/code/main.py index 4a25052..6028b3d 100644 --- a/code/main.py +++ b/code/main.py @@ -73,24 +73,22 @@ - add pca option and allow visualization of key features on multivar plt? #TODO# -- in source spectra viewer in tab plot -- do overall data quality score, AUC +- in source spectra viewer in spectrum details tab plot with preexisting in source fragment deconvolution algoirthm +- clean up import sections and general code for better maintability and good syntax/standards +- do overall data quality score, AUC on CV plot or something, may be present in a different form already - standardize method and class names -- database management, options -- fix up analysisinfo file output - -- mzmine msp file import -- add other ordination options +- add terminal output with current line to status bar instead of just static status messages, perhaps with expand button to show full terminal output +- potentially consider other database options like HMDB etc +- fix up analysisinfo file output with better and more useful log ingo +- add other ordination options like pca, pls-da, etc etc - add custom keyword arguments for each plot to make calling them easier -- add runcheck before searching when switching to search tab -- Figure out way to have only active plot be updated and then to update others - when plot is switched -- make it so groups can be reordered +- add runcheck before searching when switching to search tab if not present +- make it so groups can be reordered in the groupsets widgets? - consider if indexing and feature highly functions in plot options have any easy wins for optimization or disk use. (prob not) - make goto buttons just one class and lambda an index for the stacked widgets when connecting! - +likely items that need more thought and planning - maybe have a comparison mode for many different strains with and without elicitor - specificity/sensitivity plot - other statistical models From 9feb9ec362ce2f23bed794fa089d00ddabd2045a Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Mon, 29 Jun 2026 01:43:16 -0400 Subject: [PATCH 02/16] Add search-tab run-check, clean up main.py's dead imports - goto_search now tells the user to run an analysis first instead of silently doing nothing when the search tab is opened before self.analysisrun is set, closing the "add runcheck before searching" TODO. - Removed main.py's unused PyQt5/stdlib/groupsets imports (platform, GroupSet, several never-referenced Qt classes), verified via pyflakes + grep cross-check; no behavior change, 130 existing tests still pass. Co-Authored-By: Claude Sonnet 4.6 --- code/main.py | 12 ++++++------ code/ui_functions.py | 2 ++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/code/main.py b/code/main.py index 6028b3d..442b315 100644 --- a/code/main.py +++ b/code/main.py @@ -12,11 +12,10 @@ import time import string -import platform from PyQt5 import QtCore, QtWidgets -from PyQt5.QtWidgets import QMainWindow, QSizeGrip, QGraphicsDropShadowEffect, QFileDialog, QListWidgetItem, QColorDialog -from PyQt5.QtCore import (QCoreApplication, QPropertyAnimation, QDate, QDateTime, QMetaObject, QObject, QPoint, QRect, QSize, QTime, QUrl, Qt, QEvent) -from PyQt5.QtGui import QBrush, QColor, QIcon, QPalette, QPainter, QPixmap +from PyQt5.QtWidgets import QMainWindow, QSizeGrip +from PyQt5.QtCore import QObject, Qt +from PyQt5.QtGui import QPixmap from pathlib import Path # Install/verify non-stock dependencies (epam.indigo, UpSetPlot, squarify) @@ -34,7 +33,7 @@ import files from MSFaST import run_MSFaST, analysis_parameters -from groupsets import GroupSet, GroupSetModel, build_query_dict +from groupsets import GroupSetModel, build_query_dict from plotslots import PlotSlotRegistry from paramfields import save_checkbox_fields from csvcache import cached_read_csv, invalidate as invalidate_csv_cache @@ -75,6 +74,8 @@ #TODO# - in source spectra viewer in spectrum details tab plot with preexisting in source fragment deconvolution algoirthm - clean up import sections and general code for better maintability and good syntax/standards + ~main.py's own import section done (dead PyQt5/stdlib/groupsets imports removed, + verified unused via pyflakes + grep, no behavior change); other files not yet swept - do overall data quality score, AUC on CV plot or something, may be present in a different form already - standardize method and class names - add terminal output with current line to status bar instead of just static status messages, perhaps with expand button to show full terminal output @@ -82,7 +83,6 @@ - fix up analysisinfo file output with better and more useful log ingo - add other ordination options like pca, pls-da, etc etc - add custom keyword arguments for each plot to make calling them easier -- add runcheck before searching when switching to search tab if not present - make it so groups can be reordered in the groupsets widgets? - consider if indexing and feature highly functions in plot options have any easy wins for optimization or disk use. (prob not) - make goto buttons just one class and lambda an index for the stacked widgets diff --git a/code/ui_functions.py b/code/ui_functions.py index e0bbf68..1019600 100644 --- a/code/ui_functions.py +++ b/code/ui_functions.py @@ -211,6 +211,8 @@ def goto_search(self): self.dbsearchdone = True stop_functime('dbsearch complete') reset_runtime() + elif not self.analysisrun: + self.error('Run an analysis before searching.') #plotbar functions def goto_review(self): From b5a804c757a34c1031d1179478c254b9c52e2327 Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Mon, 29 Jun 2026 01:45:56 -0400 Subject: [PATCH 03/16] Add GroupSetModel.move() for groupset reordering (model layer only) Lays the Qt-free foundation for the "groups can be reordered" TODO: GroupSetModel.move(from_index, to_index) reorders groupsets and keeps the selection on the moved/shifted item by identity (not GroupSet's value-based __eq__, since two freshly-added default groupsets compare equal). 8 new tests in test_groupsets.py. UI wiring (drag-and-drop on listWidget_pltgrps) intentionally left for later -- it would need to be verified against a live GUI session to confirm it interacts correctly with updatesets()'s existing blockSignals dance, which isn't something to ship unverified. Co-Authored-By: Claude Sonnet 4.6 --- code/groupsets.py | 28 ++++++++++++++ code/main.py | 6 +++ code/tests/test_groupsets.py | 72 ++++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+) diff --git a/code/groupsets.py b/code/groupsets.py index 6bee426..39789d8 100644 --- a/code/groupsets.py +++ b/code/groupsets.py @@ -122,6 +122,34 @@ def remove(self, index=None): del self._items[index] self.select(self._selected) + def move(self, from_index, to_index): + """Reorder the groupset at ``from_index`` to ``to_index``. + + Both indices are clamped to the valid range; out-of-range or equal + indices are a no-op. Selection follows the moved item, so the + groupset that was selected before the move is still selected after + (by identity, not by index) -- a drag-and-drop reorder shouldn't + change which groupset is being edited. + """ + if not self._items: + return + from_index = max(0, min(from_index, len(self._items) - 1)) + to_index = max(0, min(to_index, len(self._items) - 1)) + if from_index == to_index: + return + selected_item = self.selected + groupset = self._items.pop(from_index) + self._items.insert(to_index, groupset) + if selected_item is not None: + # Identity, not '==' -- GroupSet.__eq__ is value-based, and two + # distinct groupsets can compare equal (e.g. freshly added ones + # before either is edited), so list.index() could pick the wrong + # one. + for i, item in enumerate(self._items): + if item is selected_item: + self._selected = i + break + def update(self, index, *, name=None, src=None, incl=None, excl=None, colour=None): """Overwrite the given fields of the groupset at ``index``.""" groupset = self._items[index] diff --git a/code/main.py b/code/main.py index 442b315..5f21441 100644 --- a/code/main.py +++ b/code/main.py @@ -84,6 +84,12 @@ - add other ordination options like pca, pls-da, etc etc - add custom keyword arguments for each plot to make calling them easier - make it so groups can be reordered in the groupsets widgets? + ~model-layer support done: GroupSetModel.move() (groupsets.py), tested in + test_groupsets.py. UI drag-drop wiring (listWidget_pltgrps InternalMove + + syncing its rowsMoved signal to model.move()) not done -- needs a live + GUI session to verify the selection-tracking interacts correctly with + updatesets()'s existing blockSignals dance, which isn't something to + guess at unverified - consider if indexing and feature highly functions in plot options have any easy wins for optimization or disk use. (prob not) - make goto buttons just one class and lambda an index for the stacked widgets when connecting! diff --git a/code/tests/test_groupsets.py b/code/tests/test_groupsets.py index 297b1b3..d4c89c4 100644 --- a/code/tests/test_groupsets.py +++ b/code/tests/test_groupsets.py @@ -99,6 +99,78 @@ def test_remove_all_items_leaves_selection_at_negative_one(): assert model.selected is None +# --------------------------------------------------------------------------- # +# GroupSetModel: move (reordering) +# --------------------------------------------------------------------------- # + +def test_move_reorders_items(): + model = GroupSetModel() + model.add('a') + model.add('b') + model.add('c') + model.move(0, 2) + assert [g.name for g in model] == ['b', 'c', 'a'] + + +def test_move_keeps_selection_on_the_moved_item(): + model = GroupSetModel() + model.add('a') + model.add('b') + model.add('c') + model.select(0) # 'a' selected + model.move(0, 2) + assert model.selected.name == 'a' + assert model.selected_index == 2 + + +def test_move_keeps_selection_on_a_different_item_that_shifted_position(): + model = GroupSetModel() + model.add('a') + model.add('b') + model.add('c') + model.select(1) # 'b' selected + model.move(0, 2) # moves 'a' past 'b', so 'b' shifts from index 1 to 0 + assert model.selected.name == 'b' + assert model.selected_index == 0 + + +def test_move_with_equal_indices_is_a_noop(): + model = GroupSetModel() + model.add('a') + model.add('b') + model.move(1, 1) + assert [g.name for g in model] == ['a', 'b'] + + +def test_move_clamps_out_of_range_indices(): + model = GroupSetModel() + model.add('a') + model.add('b') + model.add('c') + model.move(-5, 99) + assert [g.name for g in model] == ['b', 'c', 'a'] + + +def test_move_on_empty_model_is_a_noop(): + model = GroupSetModel() + model.move(0, 1) # must not raise + assert len(model) == 0 + + +def test_move_disambiguates_value_equal_groupsets_by_identity(): + # Two freshly-added default groupsets compare equal (GroupSet.__eq__ is + # value-based, and both start with identical fields), so move() must + # track the selected item by identity, not by list.index()'s '=='. + model = GroupSetModel() + model.add('dup') + model.add('dup') + model.select(0) + first = model.selected + model.move(0, 1) + assert model.selected is first + assert model.selected_index == 1 + + # --------------------------------------------------------------------------- # # GroupSetModel: CRUD # --------------------------------------------------------------------------- # From 504410f9a67068227f122d86b0b316e2af4d84c3 Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Mon, 29 Jun 2026 01:47:03 -0400 Subject: [PATCH 04/16] Clarify stale PCA TODO note PCA itself already exists (plot_PCA/goto_pca/checkbox field) -- the remaining gap is specifically loadings/biplot visualization of which features drive each component, which plot_PCA doesn't do yet. Reworded so the TODO doesn't read as if PCA support is still missing. Co-Authored-By: Claude Sonnet 4.6 --- code/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/code/main.py b/code/main.py index 5f21441..641a07c 100644 --- a/code/main.py +++ b/code/main.py @@ -69,7 +69,10 @@ -add bypass for plots based on checkmark. possibly use if check: ... else: button.hide() then pass - distribution of CVs on bottom of cvplt? -- add pca option and allow visualization of key features on multivar plt? +- allow visualization of key features (loadings/biplot) on multivar plt + (PCA itself already exists -- plot_PCA/goto_pca/checkbox field -- this is + specifically about showing which original features drive each component, + which plot_PCA doesn't do yet) #TODO# - in source spectra viewer in spectrum details tab plot with preexisting in source fragment deconvolution algoirthm From c0c160c013316202ddd33eab3dd47e3603b8a28d Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Mon, 29 Jun 2026 13:17:19 -0400 Subject: [PATCH 05/16] Add Qt-free multivariate ordination backend (PCA/NMDS/PLS-DA) New code/ordination.py: PCA, NMDS (the metric-MDS-warm-started non-metric MDS already used by the soon-to-be-renamed plot_PCA, kept verbatim), and PLS-DA, plus a Qt-free port of the data-loading/ technical-replicate-collapsing logic the plot currently has hardcoded off (plotting.py: `parent.collapsereps = False#...`). The collapse-replicate logic is a near-verbatim port of the original, not a rewrite -- its header-juggling (round-tripping through a CSV to relabel an unstack() result) is easy to get subtly wrong by inspection alone, so it's preserved as-is and verified empirically instead: test_ordination.py constructs a synthetic peak table with 3 samples across 2 biological groups, 3 technical-replicate injections each, and asserts collapsing lands on exactly 3 rows (one per Sample) -- not 9 (uncollapsed) and not 2 (would mean biological replicates got merged too). Cross-checked against real example data with a scratch script (27 injections / 9 samples / 3 groups -> collapses to 9, not 3). Also caught and fixed a real bug while validating against real data: PLSRegression's default scale=True standardizes X internally, so the original explained-variance-ratio calc (component score variance / unscaled total variance) silently produced ratios around 1e-6 instead of the ~0.7 a well-separated dataset should show. Fixed with scale=False, matching PCA's plain-centered treatment. OPLS-DA is intentionally not implemented (see ordination.py's module docstring) -- no scikit-learn support, and the alternatives (an unmaintained third-party package, or a from-scratch implementation with no reference dataset to validate against) are both riskier than shipping PCA/NMDS/PLS-DA now and revisiting OPLS-DA later. Co-Authored-By: Claude Sonnet 4.6 --- code/ordination.py | 240 ++++++++++++++++++++++++++++++++++ code/tests/test_ordination.py | 195 +++++++++++++++++++++++++++ 2 files changed, 435 insertions(+) create mode 100644 code/ordination.py create mode 100644 code/tests/test_ordination.py diff --git a/code/ordination.py b/code/ordination.py new file mode 100644 index 0000000..1457168 --- /dev/null +++ b/code/ordination.py @@ -0,0 +1,240 @@ +""" +MPACT +Copyright 2022, Robert M. Samples, Sara P. Puckett, and Marcy J. Balunas + +Qt-free multivariate ordination backend: PCA, NMDS, and PLS-DA on the +samples x features intensity matrix, plus the data prep (technical-replicate +collapsing) and loadings-selection logic the "multivariate" plot tab needs. + +OPLS-DA is intentionally not implemented here: scikit-learn has no native +support, and the only alternatives (the unmaintained ``pyopls`` package, or a +from-scratch orthogonal-signal-correction implementation) are both +meaningfully riskier than PCA/NMDS/PLS-DA without a reference dataset to +validate against. Logged as future work, not started. + +This module is Qt-free and unit-tested (see ``tests/test_ordination.py``). +""" + +import numpy as np +import pandas as pd +from sklearn.cross_decomposition import PLSRegression +from sklearn.decomposition import PCA +from sklearn.metrics import pairwise_distances +from sklearn import manifold + + +def load_ordination_matrix(file, raw_msdata_header, collapse_replicates): + """Load the samples x features intensity matrix used for ordination. + + This is a near-verbatim port of the data-loading half of the original + (dead-checkbox) ``plot_PCA.plot()`` -- deliberately not redesigned, since + the original's row-grouping math is correct (verified empirically in + ``test_ordination.py``/the scratch script, not re-derived by inspection + here -- this header-juggling is genuinely easy to get subtly wrong by + reasoning about it instead of testing it). Only the hardcoded + ``collapsereps = False`` is now a real parameter. + + Args: + file: path to the canonical ``_filtered.csv`` peak table (3-row + header: Biolgroup, Sample, Injection; see devnotes.md). + raw_msdata_header: the same peak table's 3 header rows, read + *raw* (``header=None, index_col=[0,1,2]).iloc[:3,:].transpose()``, + exactly as the original code reads it -- NOT yet renamed or + re-indexed; that happens inside this function (for the + ``collapse_replicates=True`` case, a *different* header -- + read from the freshly-collapsed intermediate file -- is used + instead, matching the original's control flow exactly). + collapse_replicates: if True, average technical replicates (multiple + Injections under the same Sample) together, keeping biological + replicates (distinct Samples) separate. If False, every + Injection is its own row, as-is. + + Returns: + (X, biolgroup): ``X`` is a DataFrame indexed by sample identifier + ('File'; an Injection name, or a Sample name when collapsed), + columns = features. ``biolgroup`` is a Series, same index as ``X``, + mapping each sample to its biological group. + """ + if collapse_replicates: + # Average technical replicates (Injection) while keeping biological + # replicates (Sample) distinct -- level order is Compound, m/z, RT, + # Biolgroup, Sample, Injection (MSFaST.py's msdata_header.columns + # assignment). Round-trips through a CSV (matching the original) + # so the relabeled 3-row header can be read back the same way the + # uncollapsed path reads the real file, rather than hand-deriving + # unstack()'s resulting column-level order. + msdata = pd.read_csv(file, sep=',', header=[0, 1, 2], index_col=[0, 1, 2]) + try: + msdata = msdata.stack([0, 1, 2], future_stack=True) + except TypeError: + msdata = msdata.stack([0, 1, 2]) + msdata = msdata.groupby(level=[0, 1, 2, 3, 4]).mean().unstack(level=[-1, -2]) + collapsed_columns = msdata.columns.to_list() + msdata = msdata.reset_index() + header = [('', '', 'Compound'), ('', '', 'm/z'), ('', '', 'Retention time (min)')] + for elem in collapsed_columns: + header.append((elem[1], '', elem[0])) + msdata.columns = pd.MultiIndex.from_tuples(header) + msdata.to_csv('averagepca.csv', header=True, index=False) + + msdata_header = pd.read_csv('averagepca.csv', sep=',', header=None, + index_col=[0, 1, 2]).iloc[:3, :].transpose() + pcadf = (pd.read_csv('averagepca.csv', sep=',', header=[2], index_col=[0]) + .drop(['m/z', 'Retention time (min)'], axis=1) + .transpose().astype(float).reset_index().rename(columns={'index': 'File'})) + else: + msdata_header = raw_msdata_header + pcadf = (pd.read_csv(file, sep=',', header=[2], index_col=[0]) + .drop(['m/z', 'Retention time (min)'], axis=1) + .transpose().astype(float).reset_index().rename(columns={'index': 'File'})) + + msdata_header.columns = ['Biolgroup', 'Sample', 'Injection'] + msdata_header = msdata_header.set_index('Injection') + + x = pcadf.set_index('File') + biolgroup = pd.Series( + {file_id: msdata_header.loc[file_id, 'Biolgroup'] for file_id in pcadf['File']}, + name='Biolgroup', + ) + biolgroup.index.name = 'File' + return x, biolgroup + + +def run_pca(x, n_components): + """Plain PCA on the samples x features matrix. + + Returns: + (scores, loadings, explained_variance_ratio): ``scores`` is a + DataFrame (index=samples, columns=PC1..PCn); ``loadings`` is a + DataFrame (index=features, columns=PC1..PCn) of each feature's + contribution to each component; ``explained_variance_ratio`` is an + ndarray of length ``n_components``. + """ + pca = PCA(n_components=n_components) + scores = pca.fit_transform(x.values - x.values.mean(axis=0)) + columns = [f'PC{i + 1}' for i in range(n_components)] + scores = pd.DataFrame(scores, index=x.index, columns=columns) + loadings = pd.DataFrame(pca.components_.T, index=x.columns, columns=columns) + return scores, loadings, pca.explained_variance_ratio_ + + +def run_nmds(x, n_components): + """Non-metric MDS on Bray-Curtis sample dissimilarities, warm-started + from a metric MDS solution, then rotated onto principal axes purely for + a stable/sensible orientation (this rotation is NOT a second ordination + of the original features -- it doesn't change the NMDS embedding's + inter-point distances, only its axis orientation). + + Returns: + (scores, explained_variance_ratio, stress): ``explained_variance_ratio`` + here is the variance of the *embedded* (already-reduced) NMDS + coordinates explained by each rotated axis -- not, unlike PCA's, a + measure of how much of the original feature-space variance is + captured. Callers should label this distinctly (e.g. "% of + embedding variance") rather than implying it's the same quantity as + PCA's explained variance. + """ + similarities = pairwise_distances(x.values - x.values.mean(), metric='braycurtis') + + mds = manifold.MDS(n_components=n_components, max_iter=3000, eps=1e-9, + random_state=1, dissimilarity="precomputed", n_jobs=1) + pos = mds.fit(similarities).embedding_ + + nmds = manifold.MDS(n_components=n_components, metric=False, max_iter=3000, + eps=1e-12, dissimilarity="precomputed", random_state=1, + n_jobs=1, n_init=1) + npos = nmds.fit_transform(similarities, init=pos) + stress = nmds.stress_ + + pca = PCA(n_components=n_components) + rotated = pca.fit_transform(npos) + columns = [f'NMDS{i + 1}' for i in range(n_components)] + scores = pd.DataFrame(rotated, index=x.index, columns=columns) + return scores, pca.explained_variance_ratio_, stress + + +def nmds_loading_proxy(x, scores): + """Per-feature correlation with each NMDS axis, as a loadings-equivalent. + + NMDS has no linear feature loadings (it's a rank-based embedding, not a + linear projection of the original features) -- this is the standard + ecology "vector fitting" approach (cf. vegan::envfit): correlate each + original feature with each ordination axis and use that as the + loadings-plot substitute. + + Returns: + DataFrame (index=features, columns=same as ``scores``) of Pearson + correlation coefficients. + """ + return pd.DataFrame( + {col: x.corrwith(scores[col]) for col in scores.columns}, + index=x.columns, + ) + + +def run_plsda(x, y, n_components): + """PLS-DA: PLS regression of the samples x features matrix against + one-hot-encoded group labels. + + Args: + x: samples x features DataFrame. + y: Series of group labels, indexed the same as ``x``. + n_components: number of PLS components. + + Returns: + (scores, loadings, explained_variance_ratio): shapes match + ``run_pca``'s. scikit-learn doesn't expose an explained-variance + ratio for PLS directly, so it's computed manually here as each + component's X-score variance divided by the total variance of + (centered) ``x`` -- the standard approach for reporting %-explained + on a PLS biplot. + """ + y_dummies = pd.get_dummies(y) + # scale=False: PLSRegression's default scale=True standardizes X (and Y) + # to unit variance per column internally, so x_scores_ would otherwise + # live on a different scale than x_centered below -- comparing the two + # directly (as the explained-variance-ratio calc does) silently produced + # a near-zero, meaningless ratio until this was caught by running this + # against real data (see the scratch script / devnotes.md). + pls = PLSRegression(n_components=n_components, scale=False) + pls.fit(x.values, y_dummies.values) + x_scores = pls.x_scores_ + columns = [f'PLS{i + 1}' for i in range(n_components)] + scores = pd.DataFrame(x_scores, index=x.index, columns=columns) + loadings = pd.DataFrame(pls.x_loadings_, index=x.columns, columns=columns) + + x_centered = x.values - x.values.mean(axis=0) + total_variance = np.sum(x_centered ** 2) + component_variance = np.sum(x_scores ** 2, axis=0) + explained_variance_ratio = component_variance / total_variance + return scores, loadings, explained_variance_ratio + + +def top_loadings(loadings, n=25, always_include=()): + """Subset of ``loadings`` to actually draw on a loadings plot. + + High-dimensional data (thousands of features) can't all be plotted + legibly, so this returns only the top ``n`` features by vector magnitude + (Euclidean norm across all of ``loadings``'s columns) -- plus any + feature named in ``always_include``, even if its magnitude wouldn't + otherwise make the cut. That's what lets the app highlight a specific + (possibly tiny) feature on demand without changing the default view. + + Args: + loadings: DataFrame (index=features, columns=components). + n: how many top-magnitude features to include by default. + always_include: iterable of feature names (must be a subset of + ``loadings.index``) to include regardless of magnitude. + + Returns: + DataFrame: subset of ``loadings`` (same columns), index order + preserved from ``loadings``, with at most ``n + len(always_include)`` + rows (fewer if there's overlap or ``loadings`` itself is smaller). + """ + magnitude = np.sqrt((loadings ** 2).sum(axis=1)) + top_n_index = magnitude.nlargest(min(n, len(loadings))).index + forced = [feat for feat in always_include if feat in loadings.index] + keep = top_n_index.union(forced, sort=False) + # Preserve loadings' original row order rather than magnitude-sorted order. + keep = [feat for feat in loadings.index if feat in keep] + return loadings.loc[keep] diff --git a/code/tests/test_ordination.py b/code/tests/test_ordination.py new file mode 100644 index 0000000..1813600 --- /dev/null +++ b/code/tests/test_ordination.py @@ -0,0 +1,195 @@ +"""Unit tests for the multivariate ordination backend (``ordination.py``). + +Covers data loading/replicate-collapsing (verified against a synthetic +3-header-row peak table with a known technical/biological-replicate +structure -- see the plan for why this is empirically checked rather than +trusted by inspection) and the PCA/NMDS/PLS-DA/top_loadings math against +small synthetic matrices. +""" + +import numpy as np +import pandas as pd +import pytest + +from ordination import ( + load_ordination_matrix, nmds_loading_proxy, run_nmds, run_pca, run_plsda, + top_loadings, +) + + +# --------------------------------------------------------------------------- # +# load_ordination_matrix / collapse_replicates +# --------------------------------------------------------------------------- # + +def _write_synthetic_filtered_csv(path): + """3 samples (S1, S1b in groupA; S2 in groupB), 3 technical-replicate + injections each (9 injections total) -- enough to tell "collapsed to + one row per Sample" apart from both "per Injection" (9) and "per + Biolgroup" (2, since there are only 2 biolgroups but 3 samples). + """ + with open(path, 'w') as f: + f.write(',,,groupA,groupA,groupA,groupA,groupA,groupA,groupB,groupB,groupB\n') + f.write(',,,S1,S1,S1,S1b,S1b,S1b,S2,S2,S2\n') + f.write('Compound,m/z,Retention time (min),inj1,inj2,inj3,inj4,inj5,inj6,inj7,inj8,inj9\n') + f.write('feat1,100.0,1.0,10,12,11,30,32,31,50,52,51\n') + f.write('feat2,200.0,2.0,5,6,4,15,16,14,20,19,21\n') + + +def _raw_header(path): + return pd.read_csv(path, sep=',', header=None, index_col=[0, 1, 2]).iloc[:3, :].transpose() + + +def test_uncollapsed_keeps_one_row_per_injection(tmp_path): + path = tmp_path / 'example_filtered.csv' + _write_synthetic_filtered_csv(path) + x, biolgroup = load_ordination_matrix(path, _raw_header(path), collapse_replicates=False) + assert x.shape == (9, 2) + assert len(biolgroup) == 9 + + +def test_collapsed_averages_technical_not_biological_replicates(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) # 'averagepca.csv' lands here, not the repo + path = tmp_path / 'example_filtered.csv' + _write_synthetic_filtered_csv(path) + x, biolgroup = load_ordination_matrix(path, _raw_header(path), collapse_replicates=True) + + # 3 distinct Samples (S1, S1b, S2) -- not 9 (uncollapsed) and not 2 + # (the number of Biolgroups, which would mean biological replicates got + # wrongly merged too). + assert x.shape[0] == 3 + assert x.shape[1] == 2 + assert biolgroup.nunique() == 2 + assert sorted(biolgroup.unique()) == ['groupA', 'groupB'] + # Two of the three collapsed rows belong to groupA (S1, S1b). + assert (biolgroup == 'groupA').sum() == 2 + assert (biolgroup == 'groupB').sum() == 1 + + +def test_collapsed_values_are_the_mean_of_their_technical_replicates(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + path = tmp_path / 'example_filtered.csv' + _write_synthetic_filtered_csv(path) + x, _ = load_ordination_matrix(path, _raw_header(path), collapse_replicates=True) + + # S1's feat1 replicates are 10, 12, 11 -> mean 11. + s1_row = x.loc[x.index.str.contains('S1') & ~x.index.str.contains('S1b')] + assert s1_row['feat1'].iloc[0] == pytest.approx(11.0) + + +# --------------------------------------------------------------------------- # +# run_pca / run_nmds / run_plsda / top_loadings +# --------------------------------------------------------------------------- # + +def _make_low_rank_matrix(): + # 12 samples, 5 features, but only 2 real underlying dimensions of + # variation -- PCA on this should recover ~100% explained variance in + # the first 2 components. + rng = np.random.RandomState(0) + latent = rng.normal(size=(12, 2)) + loading_matrix = rng.normal(size=(2, 5)) + x = pd.DataFrame( + latent @ loading_matrix + rng.normal(scale=0.01, size=(12, 5)), + index=[f's{i}' for i in range(12)], + columns=[f'f{i}' for i in range(5)], + ) + return x + + +def test_pca_recovers_known_low_rank_structure(): + x = _make_low_rank_matrix() + scores, loadings, expvar = run_pca(x, n_components=3) + assert scores.shape == (12, 3) + assert loadings.shape == (5, 3) + # Two real dimensions of variation + tiny noise -> first two components + # should capture almost all the variance. + assert expvar[:2].sum() > 0.99 + + +def test_plsda_separates_two_groups_along_first_component(): + rng = np.random.RandomState(1) + group_a = rng.normal(loc=0, scale=0.5, size=(10, 6)) + group_b = rng.normal(loc=5, scale=0.5, size=(10, 6)) + x = pd.DataFrame( + np.vstack([group_a, group_b]), + index=[f's{i}' for i in range(20)], + columns=[f'f{i}' for i in range(6)], + ) + y = pd.Series(['A'] * 10 + ['B'] * 10, index=x.index) + + scores, loadings, expvar = run_plsda(x, y, n_components=2) + assert scores.shape == (20, 2) + assert loadings.shape == (6, 2) + # The groups are cleanly separated along PLS1: every A score should be + # on one side of 0 and every B score on the other (sign is arbitrary). + pls1 = scores['PLS1'] + assert (pls1[:10] > 0).all() != (pls1[10:] > 0).all() + # A real, well-separated signal should explain a meaningful share of + # variance -- catches the scale=True/scale=False bug (manually + # confirmed against real data: that bug produced ratios on the order of + # 1e-6 instead of comparable-to-PCA's ~0.7). + assert expvar[0] > 0.1 + + +def test_nmds_smoke_test_on_clustered_data(): + rng = np.random.RandomState(2) + cluster_a = rng.normal(loc=0, scale=0.2, size=(6, 8)) + cluster_b = rng.normal(loc=10, scale=0.2, size=(6, 8)) + x = pd.DataFrame( + np.vstack([cluster_a, cluster_b]), + index=[f's{i}' for i in range(12)], + columns=[f'f{i}' for i in range(8)], + ) + scores, expvar, stress = run_nmds(x, n_components=2) + assert scores.shape == (12, 2) + assert len(expvar) == 2 + assert np.isfinite(stress) + assert stress >= 0 + + proxy = nmds_loading_proxy(x, scores) + assert proxy.shape == (8, 2) + assert proxy.values.min() >= -1.0001 and proxy.values.max() <= 1.0001 + + +# --------------------------------------------------------------------------- # +# top_loadings +# --------------------------------------------------------------------------- # + +def _make_loadings(n=30): + rng = np.random.RandomState(3) + return pd.DataFrame( + rng.normal(size=(n, 2)), + index=[f'feat{i}' for i in range(n)], + columns=['PC1', 'PC2'], + ) + + +def test_top_loadings_returns_n_rows_by_default(): + loadings = _make_loadings(30) + top = top_loadings(loadings, n=10) + assert len(top) == 10 + + +def test_top_loadings_includes_forced_feature_outside_top_n(): + loadings = _make_loadings(30) + top = top_loadings(loadings, n=5) + magnitude = np.sqrt((loadings ** 2).sum(axis=1)) + smallest_feature = magnitude.idxmin() + assert smallest_feature not in top.index + + top_forced = top_loadings(loadings, n=5, always_include=[smallest_feature]) + assert len(top_forced) == 6 + assert smallest_feature in top_forced.index + + +def test_top_loadings_forced_feature_already_in_top_n_is_not_duplicated(): + loadings = _make_loadings(30) + magnitude = np.sqrt((loadings ** 2).sum(axis=1)) + largest_feature = magnitude.idxmax() + top = top_loadings(loadings, n=10, always_include=[largest_feature]) + assert len(top) == 10 # already in the top 10, no duplicate row added + + +def test_top_loadings_n_larger_than_available_returns_everything(): + loadings = _make_loadings(5) + top = top_loadings(loadings, n=100) + assert len(top) == 5 From 9c15631fd4c3f541209c6f46f2d15fae1ee49cbb Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Mon, 29 Jun 2026 13:26:03 -0400 Subject: [PATCH 06/16] Rework the mislabeled "PCA" plot into a multivariate ordination tab plot_PCA only ever ran NMDS (with a PCA rotation applied afterward purely to orient axes) -- renamed to plot_ordination and reworked to genuinely support PCA, NMDS, and PLS-DA, selectable via a combo-box switcher bar inserted above the plot canvas (same runtime widget-substitution pattern as searchtree.py's filter bar), plus a Scores/Loadings view toggle. The math moved to the new Qt-free ordination.py (previous commit); this is the Qt plumbing on top of it. - Axis labels now show percent-variance-explained where meaningful (PCA/PLS-DA: real feature-space variance; NMDS: labeled distinctly as embedding variance, since it isn't the same quantity). - Loadings view shows the top-25 features by vector magnitude as origin-anchored arrows (thousands of features can't all be drawn legibly) -- but whichever feature is currently highlighted elsewhere in the app is always included regardless of magnitude, via a second pre-created highlight artist (plot_ordination.highlight_loading(), called from MainWindow._refresh_highlight()) following the same convention every other plot's highlight marker already uses. - Restored "Collapse Technical Replicates": plotting.py previously had this hardcoded off (`parent.collapsereps = False#...isChecked()`); now reads the real checkbox via ordination.load_ordination_matrix(). - checkBox_pca's visible text/btn_pca's tooltip changed from "PCA" to "Multivariate" -- the underlying objectName/analysis_params.PCA attribute are unchanged for .mpct save-file compatibility. - Verified the view/method-switching lifecycle (Scores<->Loadings, method changes, the highlight-on-demand path for a feature outside the default top-25) against real example data with an offscreen Qt harness before considering this done -- in particular confirmed ui_plot.reset()'s mpl_disconnect(self.event) doesn't error when switching away from Scores view (where the pick-event connection lives) and back. devnotes.md documents all of the above plus the OPLS-DA deferral (unmaintained pyopls package, or a from-scratch implementation with no reference dataset to validate against -- both riskier than shipping PCA/NMDS/PLS-DA now). Co-Authored-By: Claude Sonnet 4.6 --- code/main.py | 29 +++-- code/plotting.py | 303 +++++++++++++++++++++++++++++------------------ devnotes.md | 69 ++++++++++- 3 files changed, 277 insertions(+), 124 deletions(-) diff --git a/code/main.py b/code/main.py index f34b82e..fc91309 100644 --- a/code/main.py +++ b/code/main.py @@ -41,7 +41,7 @@ from biogroups import compute_biological_groups from dbsearch import search_npatlas from searchtree import SearchTreePanel -from plotting import plot_abund, show_spectrum, show_featureplt, plot_heatmap, plot_mzrt, plot_samplecorr, kendrick, plot_volcano, plot_fc3d, plot_dendrogram, plot_PCA, prev_cv, gen_upsetplt, gen_treemap +from plotting import plot_abund, show_spectrum, show_featureplt, plot_heatmap, plot_mzrt, plot_samplecorr, kendrick, plot_volcano, plot_fc3d, plot_dendrogram, plot_ordination, prev_cv, gen_upsetplt, gen_treemap import getfragdb from indigo import Indigo @@ -70,10 +70,6 @@ -add bypass for plots based on checkmark. possibly use if check: ... else: button.hide() then pass - distribution of CVs on bottom of cvplt? -- allow visualization of key features (loadings/biplot) on multivar plt - (PCA itself already exists -- plot_PCA/goto_pca/checkbox field -- this is - specifically about showing which original features drive each component, - which plot_PCA doesn't do yet) #TODO# - in source spectra viewer in spectrum details tab plot with preexisting in source fragment deconvolution algoirthm @@ -245,7 +241,15 @@ def __init__(self): self.ui.setupUi(self) self.ui.label_credits.setText('v1.00.01 r26.06.29') - + + # "PCA" was a misnomer left over from when this checkbox/button only + # ran NMDS (see plotting.plot_ordination) -- the underlying + # checkBox_pca objectName/analysis_params.PCA attribute stay + # unchanged for .mpct save-file compatibility; only the visible text + # and tooltip change. + self.ui.checkBox_pca.setText('Multivariate') + self.ui.btn_pca.setToolTip('Multivariate Ordination (PCA/NMDS/PLS-DA)') + #initialize other dialog windows self.dialog = dialog() self.ftrdialog = ftrdialog() @@ -766,6 +770,13 @@ def _refresh_highlight(self): ) self.canvas['kmd'].draw_idle() + # Update the multivariate plot's loadings-view highlight (a separate + # concept from its scores view, which highlights a clicked *sample* + # via parent.pickedsample, not a feature -- so this only applies + # when self.pca exists and is currently showing loadings). + if getattr(self, 'pca', None) is not None: + self.pca.highlight_loading(self.pickedfeature, self.highlightcol) + # Update feature plot with the selected feature self.highlight['featureplt'].set_data( [iondict.loc[self.pickedfeature, 'Retention time (min)']], @@ -1062,10 +1073,10 @@ def _generate_plots(self): stop_functime('dendrogram complete') if params.PCA: - self._create_or_reset('pca', 'PCA/NMDS plot', - lambda: plot_PCA(self, 'pca', self.ui.frame_pca, pltfile, '', ''), + self._create_or_reset('pca', 'multivariate ordination plot', + lambda: plot_ordination(self, 'pca', self.ui.frame_pca, pltfile, '', ''), lambda: self.pca.reset(pltfile, '', '')) - stop_functime('nmds complete') + stop_functime('ordination complete') if params.FC3Dplt: self._create_or_reset('fc3d', '3D fold-change plot', diff --git a/code/plotting.py b/code/plotting.py index b1b2e3f..046de0c 100644 --- a/code/plotting.py +++ b/code/plotting.py @@ -9,6 +9,7 @@ import pickle from csvcache import cached_read_csv, invalidate as invalidate_csv_cache +import ordination import matplotlib #matplotlib.style.use('ggplot') @@ -35,9 +36,6 @@ import scipy.cluster.hierarchy as shc from sklearn.preprocessing import normalize -from sklearn import manifold -from sklearn.metrics import pairwise_distances -from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler from matplotlib.patches import Ellipse from filter import listfilter @@ -833,152 +831,233 @@ def plot(self, parent, file, filtereddfs, groupsets): left=0.1, right=0.95, bottom=0.35, top=0.9, hspace=0.2, wspace=0.2) parent.canvas[self.currplt].draw() -class plot_PCA(ui_plot): - #plots NMDS data - # should include opion to allow user specified pca colors - # need to fix selection of samples on PCA plot - # should add PCA vs NMDS option +_ORDINATION_SWITCHER_STYLE = """ +QWidget { + background-color: rgba(70,70,70,25); +} +QComboBox { + background-color: rgb(50,50,50); + color: rgb(200,200,200); + border: 1px solid rgb(70,70,70); + border-radius: 2px; + padding: 2px; +} +QLabel { + color: rgb(200,200,200); + background: transparent; +} +""" + + +class plot_ordination(ui_plot): + """Multivariate ordination plot: PCA, NMDS, or PLS-DA, with a + scores-vs-loadings view toggle. + + A combo-box switcher bar (built once in ``__init__``, inserted above the + canvas the same way ``SearchTreePanel``'s filter bar is substituted into + a Designer placeholder -- see searchtree.py) lets the user pick the + ordination method and the scores/loadings view; both redraw onto the + same axes via ``self.plot(...)`` rather than rebuilding the canvas. + + The actual math lives in the Qt-free ``ordination.py`` (PCA/NMDS/PLS-DA, + technical-replicate collapsing, top-N loadings selection); this class is + just the Qt plumbing and rendering on top of it. + """ + + METHODS = ('NMDS', 'PCA', 'PLS-DA') + VIEWS = ('Scores', 'Loadings') + def __init__(self, parent, currplt, frame, file, filtereddfs, groupsets): ui_plot.__init__(self, parent, currplt, frame) self.parent = parent self.currplt = currplt + # Defaults match the plot's previous (NMDS-only, scores-only) + # behaviour exactly, so existing sessions see no change until they + # explicitly switch the new controls. + self.method = 'NMDS' + self.view = 'Scores' + self.loadings_df = None + self._build_switcher_bar(parent, currplt) self.plot(parent, file, filtereddfs, groupsets) + def _build_switcher_bar(self, parent, currplt): + bar = QtWidgets.QWidget() + bar.setStyleSheet(_ORDINATION_SWITCHER_STYLE) + layout = QtWidgets.QHBoxLayout(bar) + layout.setContentsMargins(4, 2, 4, 2) + + layout.addWidget(QtWidgets.QLabel('Method:')) + method_combo = QtWidgets.QComboBox() + method_combo.addItems(self.METHODS) + method_combo.setCurrentText(self.method) + method_combo.currentTextChanged.connect(self._on_method_changed) + layout.addWidget(method_combo) + + layout.addWidget(QtWidgets.QLabel('View:')) + view_combo = QtWidgets.QComboBox() + view_combo.addItems(self.VIEWS) + view_combo.setCurrentText(self.view) + view_combo.currentTextChanged.connect(self._on_view_changed) + layout.addWidget(view_combo) + layout.addStretch() + + self.method_combo = method_combo + self.view_combo = view_combo + parent.pltlayout[currplt].insertWidget(0, bar) + + def _on_method_changed(self, method): + self.method = method + self.reset(self._last_file, self._last_filtereddfs, self._last_groupsets) + + def _on_view_changed(self, view): + self.view = view + self.reset(self._last_file, self._last_filtereddfs, self._last_groupsets) + def plot(self, parent, file, filtereddfs, groupsets): - """Plot principal component analysis (PCA) or NMDS data. - + """(Re)draw the ordination plot for the current method/view. + Args: - parent (QWidget): Parent widget. - currplt (int): Current plot number. - frame (QFrame): Frame to hold the plot. - file (str): Path to the file containing the PCA data. - filtereddfs (list): List of filtered data. - groupsets (list): List of group sets. - - Attributes: - highlightcol (tuple): Tuple containing RGBA values used for highlighting. - event (int): Identifier for the pick event used to select points on the plot. - - Methods: - plot(self, parent, file, filtereddfs, groupsets): Plot the PCA data. - plot_point_cov(self, points, nstd=2, ax=None, **kwargs): Generate an ellipse for the confidence interval. - lighten_color(self, color, amount=0.5): Lighten a given color by a given amount. - plot_cov_ellipse(self, cov, pos, nstd=2, ax=None, **kwargs): Generate an optimized ellipse for the confidence interval. + parent (QWidget): Parent widget (MainWindow). + file (str): Path to the ``_filtered.csv`` peak table. + filtereddfs, groupsets: unused here (kept for the shared + ``_create_or_reset``/``reset`` call signature every plot + class follows). """ parent = self.parent - parent.collapsereps = False#parent.dialog.ui.checkBox_collapsereps.isChecked() - - if parent.collapsereps: - # Average techreps if replicate collapse is selected - msdata = pd.read_csv(file, sep=',', header=[0, 1, 2], index_col=[0, 1, 2]) - try: - msdata = msdata.stack([0, 1, 2], future_stack=True) - except TypeError: - msdata = msdata.stack([0, 1, 2]) - msdata = msdata.groupby(level=[0, 1, 2, 3, 4]).mean().unstack(level=[-1, -2]) - test2 = msdata.columns.to_list() - msdata = msdata.reset_index() - header = [('','','Compound'), ('','','m/z'), ('','','Retention time (min)')] - for elem in test2: - header.append((elem[1], '', elem[0])) - msdata.columns = pd.MultiIndex.from_tuples(header) - msdata.to_csv('averagepca.csv', header=True, index=False) - - msdata_header = pd.read_csv('averagepca.csv', sep=',', header=None, index_col=[0, 1, 2]).iloc[:3, :].transpose() - pcadf = pd.read_csv('averagepca.csv', sep=',', header=[2], index_col=[0]).drop(['m/z', 'Retention time (min)'], axis=1).transpose().astype(float).reset_index().rename(columns={'index': 'File'}) - else: - msdata_header = cached_read_csv(parent.analysis_paramsgui.outputdir / (parent.analysis_paramsgui.filename.stem + '_filtered.csv'), sep=',', header=None, index_col=[0, 1, 2]).iloc[:3, :].transpose() - pcadf = pd.read_csv(file, sep=',', header=[2], index_col=[0]).drop(['m/z', 'Retention time (min)'], axis=1).transpose().astype(float).reset_index().rename(columns={'index': 'File'}) + self._last_file = file + self._last_filtereddfs = filtereddfs + self._last_groupsets = groupsets + + collapse_replicates = parent.dialog.ui.checkBox_collapsereps.isChecked() + raw_header = cached_read_csv( + parent.analysis_paramsgui.outputdir / (parent.analysis_paramsgui.filename.stem + '_filtered.csv'), + sep=',', header=None, index_col=[0, 1, 2]).iloc[:3, :].transpose() + x, biolgroup = ordination.load_ordination_matrix(file, raw_header.copy(), collapse_replicates) + + n_components = max(2, min(len(x) - 1, 10)) - components = len(msdata_header.index) - if components > 10: - components = 10 - msdata_header.columns = ['Biolgroup', 'Sample', 'Injection'] - msdata_header = msdata_header.set_index('Injection') colors = ['red', 'blue', 'black', 'grey', 'purple', 'orange', 'green', 'yellow', 'lime', 'plum', 'teal', 'olivedrab', 'sienna', 'maroon', 'navy', 'lightcoral', 'darkgoldenrod', 'seagreen', 'lightseagreen', 'aqua', 'lightsteelblue', 'slateblue', 'blueviolet', 'plum', 'burlywood', 'salmon', 'aquamarine', 'magenta', 'tan'] colorpos, biolgroupmap = 0, {} - for elem in msdata_header['Biolgroup']: + for elem in biolgroup: if elem not in biolgroupmap and elem != parent.analysis_paramsgui.blnkgrp: ###### delete blank clause OR CHANGE TO THE BLNKFILTER OPTION biolgroupmap[elem] = colors[colorpos] colorpos += 1 - - features = pcadf.columns.values[1:] - x = pcadf[features].values - y = pcadf[['File']].values - x -= x.mean() - similarities = pairwise_distances(x, metric='braycurtis') - - mds = manifold.MDS(n_components=components, max_iter=3000, eps=1e-9, random_state=1, - dissimilarity="precomputed", n_jobs=1) - pos = mds.fit(similarities).embedding_ - - nmds = manifold.MDS(n_components=components, metric=False, max_iter=3000, eps=1e-12, - dissimilarity="precomputed", random_state=1, n_jobs=1, - n_init=1) - npos = nmds.fit_transform(similarities, init=pos) - stress_value = nmds.stress_ - print("NMDS stress: " +str(stress_value)) - - pca = PCA(n_components=components) - nmdspc = pca.fit_transform(npos) - expvar = pca.explained_variance_ratio_ - pcadftest = pd.DataFrame(data=nmdspc) - - ncomplist = list(range(components)) - nmdspc = pd.DataFrame(data=nmdspc, columns=ncomplist) - nmdspc['File'] = pcadf['File'] - nmdspc['Biolgroup'] = '' - for i, elem in enumerate(nmdspc.iloc[:, components]): - nmdspc.iloc[i, components + 1] = msdata_header.loc[elem, 'Biolgroup'] - principalDf = nmdspc.set_index('File') - + + if self.method == 'PCA': + scores, loadings, expvar = ordination.run_pca(x, n_components) + axis_labels = [f'PC{i + 1} ({100 * expvar[i]:.1f}%)' for i in range(2)] + elif self.method == 'PLS-DA': + scores, loadings, expvar = ordination.run_plsda(x, biolgroup, n_components) + axis_labels = [f'PLS{i + 1} ({100 * expvar[i]:.1f}%)' for i in range(2)] + else: + scores, expvar, stress = ordination.run_nmds(x, n_components) + loadings = ordination.nmds_loading_proxy(x, scores) + print("NMDS stress: " + str(stress)) + # Labeled distinctly from PCA/PLS-DA's: this is the variance of + # the embedded 2D NMDS coordinates, not of the original feature + # space (see ordination.run_nmds's docstring). + axis_labels = [f'NMDS{i + 1} ({100 * expvar[i]:.1f}% of embedding variance)' for i in range(2)] + + self.loadings_df = loadings + principalDf = scores.copy() + principalDf['Biolgroup'] = biolgroup + + if self.view == 'Loadings': + self._plot_loadings(parent, loadings, axis_labels) + else: + self._plot_scores(parent, principalDf, biolgroupmap, axis_labels) + + parent.fig[self.currplt].subplots_adjust(left=.1, right=.9, bottom=0.1, top=0.9, hspace=0.2, wspace=0.2) + parent.canvas[self.currplt].draw() + + def _plot_scores(self, parent, principalDf, biolgroupmap, axis_labels): for elem in biolgroupmap: scatterframe = principalDf[principalDf['Biolgroup'] == elem] - points = scatterframe.iloc[:,[0,1]].to_numpy() + points = scatterframe.iloc[:, [0, 1]].to_numpy() if np.shape(points)[0] > 2: self.plot_point_cov(points, nstd=2, ax=parent.ax[self.currplt], alpha=0.5, color=self.lighten_color(biolgroupmap[elem], 0.3)) - parent.ax[self.currplt].scatter(scatterframe.iloc[:,0], scatterframe.iloc[:,1], color=biolgroupmap[elem], marker='o', s=30, label=str(elem), picker=True) - + parent.ax[self.currplt].scatter(scatterframe.iloc[:, 0], scatterframe.iloc[:, 1], color=biolgroupmap[elem], marker='o', s=30, label=str(elem), picker=True) + parent.highlight[self.currplt], = parent.ax[self.currplt].plot([], [], 'o', markersize=12, color='yellow') - parent.ax[self.currplt].set_xlabel('NMDS1', **self.fcsfont) # (' + str(round(100*expvar[0], 2)) + '%)' - parent.ax[self.currplt].set_ylabel('NMDS2', **self.fcsfont) #(' + str(round(100*expvar[1], 2)) + '%)' - - #following two lines put a hard limit on the axis tick distance - #parent.ax[self.currplt].xaxis.set_major_locator(ticker.MultipleLocator(0.1)) - #parent.ax[self.currplt].yaxis.set_major_locator(ticker.MultipleLocator(0.1)) - + parent.ax[self.currplt].set_xlabel(axis_labels[0], **self.fcsfont) + parent.ax[self.currplt].set_ylabel(axis_labels[1], **self.fcsfont) + self.highlightcol = (0, 0, 0, 0) parent.pickedsample = pd.DataFrame(0, index=['empty'], columns=['empty']) - - def picksample(event): # fix this + + def picksample(event): if _is_duplicate_pick(parent, event): return ind = event.ind - coord = event.artist.get_offsets()[ind,:] - newsample = principalDf.loc[principalDf.iloc[:,0] == coord[0,0], :].loc[principalDf.iloc[:,1] == coord[0,1], :].reset_index() + coord = event.artist.get_offsets()[ind, :] + newsample = principalDf.loc[principalDf.iloc[:, 0] == coord[0, 0], :].loc[principalDf.iloc[:, 1] == coord[0, 1], :].reset_index() if newsample.empty: return - if newsample.iloc[0,0] == parent.pickedsample.iloc[0,0] and self.highlightcol != (0, 0, 0, 0): + if newsample.iloc[0, 0] == parent.pickedsample.iloc[0, 0] and self.highlightcol != (0, 0, 0, 0): self.highlightcol = (0, 0, 0, 0) else: self.highlightcol = 'yellow' - + parent.pickedsample = newsample - parent.ui.lbl_injname.setText('Injection/Sample: ' + str(parent.pickedsample.iloc[0,0])) - parent.highlight[self.currplt].set_data(coord[0,0],coord[0,1]) + parent.ui.lbl_injname.setText('Injection/Sample: ' + str(parent.pickedsample.iloc[0, 0])) + parent.highlight[self.currplt].set_data(coord[0, 0], coord[0, 1]) parent.highlight[self.currplt].set_color(self.highlightcol) parent.canvas[self.currplt].draw_idle() - + self.event = parent.canvas[self.currplt].figure.canvas.mpl_connect('pick_event', picksample) - parent.fig[self.currplt].subplots_adjust(left=.1, right=.9, bottom=0.1, top=0.9, hspace=0.2, wspace=0.2) - #x0,x1 = parent.ax[self.currplt].get_xlim() - #0,y1 = parent.ax[self.currplt].get_ylim() - #parent.ax[self.currplt].set_aspect(abs(x1-x0)/abs(y1-y0)) - #parent.ax[self.currplt].set_aspect('equal') parent.ax[self.currplt].legend() - parent.canvas[self.currplt].draw() - + + def _plot_loadings(self, parent, loadings, axis_labels): + """Loadings (biplot-style) view: origin-anchored arrows for the + top-N features by vector magnitude, plus -- regardless of + magnitude -- whichever feature is currently highlighted elsewhere + in the app (``parent.pickedfeature``), so a feature too small to + make the default cut is still visible on demand. + """ + always_include = [parent.pickedfeature] if getattr(parent, 'pickedfeature', '') else [] + subset = ordination.top_loadings(loadings, n=25, always_include=always_include) + + for feature, row in subset.iterrows(): + xcoord, ycoord = row.iloc[0], row.iloc[1] + parent.ax[self.currplt].annotate( + '', xy=(xcoord, ycoord), xytext=(0, 0), + arrowprops=dict(arrowstyle='->', color='steelblue', lw=1)) + parent.ax[self.currplt].annotate( + str(feature), xy=(xcoord, ycoord), fontsize=8, color='black') + + # Pre-created empty artist for the highlighted-loading marker, + # following the same convention as the scores view's + # parent.highlight[currplt] -- updated on demand by + # MainWindow._refresh_highlight() via self.highlight_loading(), + # even when the highlighted feature isn't in the default top-25. + self.loadings_highlight, = parent.ax[self.currplt].plot([], [], 'o', markersize=12, color='yellow', zorder=5) + self.highlight_loading(getattr(parent, 'pickedfeature', ''), getattr(parent, 'highlightcol', (0, 0, 0, 0))) + + parent.ax[self.currplt].axhline(0, color='grey', lw=0.5) + parent.ax[self.currplt].axvline(0, color='grey', lw=0.5) + parent.ax[self.currplt].set_xlabel(axis_labels[0], **self.fcsfont) + parent.ax[self.currplt].set_ylabel(axis_labels[1], **self.fcsfont) + + def highlight_loading(self, feature, colour): + """Update the loadings-view highlight marker for ``feature`` (a + no-op outside the loadings view or before it's been drawn once). + + Called from ``MainWindow._refresh_highlight()`` -- the same + pre-create-empty-artist/update-via-set_data convention every other + plot's highlight already follows, just driven by this plot's own + last-computed loadings instead of ``iondict``. + """ + if self.view != 'Loadings' or self.loadings_df is None or not hasattr(self, 'loadings_highlight'): + return + if not feature or feature not in self.loadings_df.index: + self.loadings_highlight.set_data([], []) + else: + row = self.loadings_df.loc[feature] + self.loadings_highlight.set_data([row.iloc[0]], [row.iloc[1]]) + self.loadings_highlight.set_color(colour) + self.parent.canvas[self.currplt].draw_idle() + def plot_point_cov(self, points, nstd=2, ax=None, **kwargs): """Generate an ellipse for the confidence interval. diff --git a/devnotes.md b/devnotes.md index b99c9dd..981b1fb 100644 --- a/devnotes.md +++ b/devnotes.md @@ -107,8 +107,10 @@ that way. Required deps (gate startup): `epam.indigo`→`indigo`, `UpSetPlot`→ (shared save/restore schema for simple `analysis_parameters` checkbox fields), `biogroups.py` (`getgroups()`'s metadata-join/group-derivation core), `dbsearch.py` (`fulldbsearch()`'s NPAtlas ppm-window matching - core). Each corresponding `MainWindow` method is now a thin wrapper: - call the module function, then apply the result to widgets/`self`. + core), `ordination.py` (PCA/NMDS/PLS-DA + technical-replicate collapsing + + top-N loadings selection for the multivariate plot tab). Each + corresponding `MainWindow` method is now a thin wrapper: call the module + function, then apply the result to widgets/`self`. - **Runtime widget substitution into a Designer placeholder** is an established pattern here, not a one-off — `plotting.py` does it for every matplotlib canvas (inserted into a Designer-created `QFrame`), and @@ -159,7 +161,7 @@ python -m pytest code/tests -q ``` Covers `filter`, `stats`, `importdependencies`, `translators`, `groupsets`, -`searchtree`. Add tests here for any new Qt-free logic. +`searchtree`, `ordination`. Add tests here for any new Qt-free logic. `conftest.py` sets `QT_QPA_PLATFORM=offscreen` and provides a session-scoped `qapp` fixture: PyQt5 widgets/models/signals *can* be exercised headlessly via @@ -188,6 +190,67 @@ on load. UI uses `QListWidget` (generated, off-limits), so this is not a true `QAbstractListModel`/`QListView` setup — the "view" side is the existing hand-written widget-sync code in `ui_functions.py`, kept thin. +## Multivariate ordination plot (`plotting.plot_ordination`, `ordination.py`) + +What used to be called "PCA" (`plot_PCA`, `checkBox_pca`/`btn_pca`'s old +tooltip) actually only ran NMDS, with a PCA rotation applied to the NMDS +coordinates purely to orient the axes — not a second ordination of the +original features. `plot_ordination` now genuinely supports PCA, NMDS, and +PLS-DA, switchable via a combo-box bar inserted above the plot canvas (same +runtime widget-substitution pattern as `searchtree.py`'s filter bar — see +above), plus a Scores/Loadings view toggle. The math lives in the Qt-free +`ordination.py` (unit-tested in `tests/test_ordination.py`); `plotting.py` +only handles the combo boxes, axes, and pick events. + +- **Save-file compatibility preserved on purpose**: `analysis_params.PCA` + and `checkBox_pca`'s objectName are unchanged (still pickled into `.mpct` + saves) — only the visible checkbox text/tooltip changed (set at runtime in + `MainWindow.__init__`, same mechanism as `label_credits.setText`). Only + the hand-written class name (`plot_PCA` → `plot_ordination`) changed, + since that's never pickled. +- **"Collapse Technical Replicates" used to be dead** (`plotting.py` had + `parent.collapsereps = False#parent.dialog.ui.checkBox_collapsereps.isChecked()` + — hardcoded off, the real read commented out). Now wired for real via + `ordination.load_ordination_matrix(..., collapse_replicates=...)`. The + collapse logic itself (average technical replicates/Injections, keep + biological replicates/Samples distinct) was ported verbatim from the + original rather than rewritten — its header-relabeling-via-CSV-round-trip + is easy to get subtly wrong by inspection, so it's verified empirically + instead (`test_ordination.py`'s synthetic-replicate-structure test, cross- + checked against real example data with a scratch script during + development). +- **Loadings view and high-dimensional data**: thousands of features can't + all be drawn legibly, so only the top-25 by loading-vector magnitude are + shown by default (`ordination.top_loadings()`). Whichever feature is + currently highlighted elsewhere in the app (`MainWindow.pickedfeature`) is + always included regardless of magnitude — `plot_ordination.highlight_loading()`, + called from `MainWindow._refresh_highlight()`, follows the same + pre-create-an-empty-artist/update-via-`set_data()` convention every other + plot's highlight marker already uses. This is a *feature* highlight + (Loadings view), a different concept from the Scores view's existing + *sample* highlight (`parent.pickedsample`, set by clicking a sample point) + — the two views show different kinds of points and were never the same + selection concept. +- **NMDS has no linear feature loadings** (it's a rank-based embedding, not + a linear projection) — its Loadings view uses `ordination.nmds_loading_proxy()`, + per-feature correlation with each NMDS axis (the standard ecology "vector + fitting"/`envfit` approach), not real loadings. Its percent-explained axis + label is also captioned distinctly from PCA/PLS-DA's ("% of embedding + variance" vs. real original-feature-space variance), since the two + quantities aren't comparable. +- **PLS-DA's explained-variance gotcha**: `sklearn.cross_decomposition.PLSRegression` + defaults to `scale=True` (standardizes X internally), which silently + produced explained-variance ratios off by ~6 orders of magnitude when + compared against unscaled total variance — caught only by running against + real data, not by inspection. Fixed with `scale=False`, matching PCA's + plain-centered (not standardized) treatment. +- **OPLS-DA intentionally not implemented**: no native scikit-learn support; + the alternatives (the unmaintained `pyopls` package, or a from-scratch + orthogonal-signal-correction implementation) are both riskier than + shipping PCA/NMDS/PLS-DA without a reference dataset to validate against. + Logged here as the next ordination method to add if ever revisited, not + started. + ## Conventions - Don't edit the generated UI files (above). Put behaviour in `main.py` / From c7b6a0107ee0dcf07de0e4cb95010a98af7cdbcf Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Mon, 29 Jun 2026 14:48:41 -0400 Subject: [PATCH 07/16] Fix ordination feedback: scaling, axis limits, NMDS %explained, bar styling All four issues caught only by checking against real data / the live GUI, not by inspection: - PCA/PLS-DA now autoscale features (mean-center + unit-variance) before fitting. Raw mass-spec intensities span a huge range across features (confirmed: feature std devs from ~1.8 to ~10,000 on real example data) -- without scaling, a handful of high-abundance features dominated both explained-variance and loadings, which is why loadings were showing up "in the thousands" while most were tiny, and why %explained looked unusually high. NMDS is deliberately left unscaled (Bray-Curtis dissimilarity is conventionally computed on raw/relative abundances). - NMDS axis labels no longer show percent-explained at all -- it's a rank-based embedding, not a linear decomposition, so it doesn't canonically have that quantity the way PCA/PLS-DA do. Shows stress (the conventional NMDS fit-quality metric) as the plot title instead. - Loadings-view axis limits are now set explicitly from the actually- plotted data: ax.annotate()'s arrows don't reliably drive matplotlib's autoscale the way ax.scatter()/ax.plot() do (confirmed empirically -- plotted points could fall outside the auto-picked view), which is what required manually rescaling each axis before. Also fixed top_loadings() being called against the full (up to 10-component) loadings instead of just the 2 displayed ones, which could let an irrelevant-to-this-view feature crowd out a genuinely prominent one. - Switcher bar: capped to a fixed max height so it doesn't eat canvas space, and restyled for page_pca's light background (white combo boxes, dark text) instead of searchtree.py's dark-theme styling, which was the wrong context here. Co-Authored-By: Claude Sonnet 4.6 --- code/ordination.py | 49 ++++++++++++++++++++++++++++++++++------------ code/plotting.py | 48 ++++++++++++++++++++++++++++++++++----------- devnotes.md | 46 ++++++++++++++++++++++++++++++++++--------- 3 files changed, 110 insertions(+), 33 deletions(-) diff --git a/code/ordination.py b/code/ordination.py index 1457168..c941453 100644 --- a/code/ordination.py +++ b/code/ordination.py @@ -100,8 +100,29 @@ def load_ordination_matrix(file, raw_msdata_header, collapse_replicates): return x, biolgroup +def autoscale(x): + """Mean-center and scale each feature to unit variance ("UV-scaling" / + "autoscaling" in chemometrics terminology -- the standard pre-treatment + for PCA/PLS-DA on mass-spec intensity data). + + Without this, raw intensities (which can span several orders of + magnitude between features -- confirmed on real example data: feature + standard deviations ranged from ~1.8 to ~10,000, a ~5800x spread) let a + handful of high-abundance features dominate both the apparent + explained-variance and the loadings, regardless of which features + actually separate the biological groups. NMDS is deliberately NOT put + through this -- its Bray-Curtis dissimilarity is conventionally computed + on raw (or relative) abundances, not standardized ones. + """ + std = x.std(axis=0) + std = std.replace(0, 1) # a zero-variance feature would divide by zero; leave it at 0 instead + return (x - x.mean(axis=0)) / std + + def run_pca(x, n_components): - """Plain PCA on the samples x features matrix. + """PCA on the samples x features matrix, after autoscaling (see + ``autoscale()``) so the result isn't dominated by whichever features + happen to have the largest raw intensity. Returns: (scores, loadings, explained_variance_ratio): ``scores`` is a @@ -110,8 +131,9 @@ def run_pca(x, n_components): contribution to each component; ``explained_variance_ratio`` is an ndarray of length ``n_components``. """ + x_scaled = autoscale(x) pca = PCA(n_components=n_components) - scores = pca.fit_transform(x.values - x.values.mean(axis=0)) + scores = pca.fit_transform(x_scaled.values) columns = [f'PC{i + 1}' for i in range(n_components)] scores = pd.DataFrame(scores, index=x.index, columns=columns) loadings = pd.DataFrame(pca.components_.T, index=x.columns, columns=columns) @@ -186,25 +208,26 @@ def run_plsda(x, y, n_components): ``run_pca``'s. scikit-learn doesn't expose an explained-variance ratio for PLS directly, so it's computed manually here as each component's X-score variance divided by the total variance of - (centered) ``x`` -- the standard approach for reporting %-explained - on a PLS biplot. + (autoscaled) ``x`` -- the standard approach for reporting + %-explained on a PLS biplot. """ + x_scaled = autoscale(x) y_dummies = pd.get_dummies(y) - # scale=False: PLSRegression's default scale=True standardizes X (and Y) - # to unit variance per column internally, so x_scores_ would otherwise - # live on a different scale than x_centered below -- comparing the two - # directly (as the explained-variance-ratio calc does) silently produced - # a near-zero, meaningless ratio until this was caught by running this - # against real data (see the scratch script / devnotes.md). + # scale=False: we already autoscaled x ourselves (above), consistent + # with run_pca -- letting PLSRegression's default scale=True scale it + # AGAIN (and scale the 0/1 dummy y columns, which doesn't make sense for + # group-membership indicators) would both double-scale x and distort y. + # (scale=False also previously fixed a bug where x_scores_ lived on a + # different scale than the unscaled total-variance denominator below, + # before autoscaling was added -- see devnotes.md.) pls = PLSRegression(n_components=n_components, scale=False) - pls.fit(x.values, y_dummies.values) + pls.fit(x_scaled.values, y_dummies.values) x_scores = pls.x_scores_ columns = [f'PLS{i + 1}' for i in range(n_components)] scores = pd.DataFrame(x_scores, index=x.index, columns=columns) loadings = pd.DataFrame(pls.x_loadings_, index=x.columns, columns=columns) - x_centered = x.values - x.values.mean(axis=0) - total_variance = np.sum(x_centered ** 2) + total_variance = np.sum(x_scaled.values ** 2) component_variance = np.sum(x_scores ** 2, axis=0) explained_variance_ratio = component_variance / total_variance return scores, loadings, explained_variance_ratio diff --git a/code/plotting.py b/code/plotting.py index 046de0c..9647767 100644 --- a/code/plotting.py +++ b/code/plotting.py @@ -831,19 +831,24 @@ def plot(self, parent, file, filtereddfs, groupsets): left=0.1, right=0.95, bottom=0.35, top=0.9, hspace=0.2, wspace=0.2) parent.canvas[self.currplt].draw() +_ORDINATION_SWITCHER_BAR_HEIGHT = 32 + +# Unlike searchtree.py's filter bar (a dark-themed tab), page_pca's +# background is light (rgba(225,225,225,255), see ui_main.py) -- dark text +# on a light/white combo box, not searchtree's light-on-dark scheme. _ORDINATION_SWITCHER_STYLE = """ QWidget { - background-color: rgba(70,70,70,25); + background: transparent; } QComboBox { - background-color: rgb(50,50,50); - color: rgb(200,200,200); - border: 1px solid rgb(70,70,70); + background-color: rgb(255,255,255); + color: rgb(30,30,30); + border: 1px solid rgb(150,150,150); border-radius: 2px; padding: 2px; } QLabel { - color: rgb(200,200,200); + color: rgb(30,30,30); background: transparent; } """ @@ -883,6 +888,7 @@ def __init__(self, parent, currplt, frame, file, filtereddfs, groupsets): def _build_switcher_bar(self, parent, currplt): bar = QtWidgets.QWidget() bar.setStyleSheet(_ORDINATION_SWITCHER_STYLE) + bar.setMaximumHeight(_ORDINATION_SWITCHER_BAR_HEIGHT) layout = QtWidgets.QHBoxLayout(bar) layout.setContentsMargins(4, 2, 4, 2) @@ -943,6 +949,7 @@ class follows). biolgroupmap[elem] = colors[colorpos] colorpos += 1 + plot_title = None if self.method == 'PCA': scores, loadings, expvar = ordination.run_pca(x, n_components) axis_labels = [f'PC{i + 1} ({100 * expvar[i]:.1f}%)' for i in range(2)] @@ -952,11 +959,12 @@ class follows). else: scores, expvar, stress = ordination.run_nmds(x, n_components) loadings = ordination.nmds_loading_proxy(x, scores) - print("NMDS stress: " + str(stress)) - # Labeled distinctly from PCA/PLS-DA's: this is the variance of - # the embedded 2D NMDS coordinates, not of the original feature - # space (see ordination.run_nmds's docstring). - axis_labels = [f'NMDS{i + 1} ({100 * expvar[i]:.1f}% of embedding variance)' for i in range(2)] + # NMDS doesn't canonically report percent-variance-explained the + # way PCA/PLS-DA do (it's a rank-based embedding, not a linear + # decomposition of the feature space) -- stress is the + # conventional thing to report for NMDS instead. + axis_labels = ['NMDS1', 'NMDS2'] + plot_title = f'Stress: {stress:.4f}' self.loadings_df = loadings principalDf = scores.copy() @@ -967,6 +975,9 @@ class follows). else: self._plot_scores(parent, principalDf, biolgroupmap, axis_labels) + if plot_title: + parent.ax[self.currplt].set_title(plot_title, fontsize=10) + parent.fig[self.currplt].subplots_adjust(left=.1, right=.9, bottom=0.1, top=0.9, hspace=0.2, wspace=0.2) parent.canvas[self.currplt].draw() @@ -1016,7 +1027,22 @@ def _plot_loadings(self, parent, loadings, axis_labels): make the default cut is still visible on demand. """ always_include = [parent.pickedfeature] if getattr(parent, 'pickedfeature', '') else [] - subset = ordination.top_loadings(loadings, n=25, always_include=always_include) + # Rank by magnitude within the 2 displayed components only, not the + # full (up to 10-component) loadings -- a feature could rank in the + # overall top-25 purely from a large contribution to some other, + # unplotted component while barely showing up here, displacing a + # feature that's actually prominent in this 2D view. + subset = ordination.top_loadings(loadings.iloc[:, :2], n=25, always_include=always_include) + + # ax.annotate()'s arrows don't reliably drive matplotlib's autoscale + # the way ax.scatter()/ax.plot() do (confirmed empirically: points + # can end up outside the auto-picked view limits), so the axis + # range is set explicitly here instead of relying on autoscale. + # Symmetric around 0 since loadings/correlations are naturally + # origin-centered (a biplot convention). + limit = subset.iloc[:, :2].abs().values.max() * 1.2 if len(subset) else 1.0 + parent.ax[self.currplt].set_xlim(-limit, limit) + parent.ax[self.currplt].set_ylim(-limit, limit) for feature, row in subset.iterrows(): xcoord, ycoord = row.iloc[0], row.iloc[1] diff --git a/devnotes.md b/devnotes.md index 981b1fb..9ece8b8 100644 --- a/devnotes.md +++ b/devnotes.md @@ -234,16 +234,44 @@ only handles the combo boxes, axes, and pick events. - **NMDS has no linear feature loadings** (it's a rank-based embedding, not a linear projection) — its Loadings view uses `ordination.nmds_loading_proxy()`, per-feature correlation with each NMDS axis (the standard ecology "vector - fitting"/`envfit` approach), not real loadings. Its percent-explained axis - label is also captioned distinctly from PCA/PLS-DA's ("% of embedding - variance" vs. real original-feature-space variance), since the two - quantities aren't comparable. + fitting"/`envfit` approach), not real loadings. **NMDS's axis labels don't + show percent-explained at all** (just "NMDS1"/"NMDS2") — NMDS is a + rank-based embedding, not a linear decomposition of the feature space, so + it doesn't canonically have a %-variance-explained quantity the way + PCA/PLS-DA do; the plot title shows stress instead, the conventional NMDS + fit-quality metric. +- **PCA/PLS-DA are autoscaled** (`ordination.autoscale()`: mean-center + + scale each feature to unit variance) before fitting — without this, raw + intensities (confirmed on real example data: feature standard deviations + ranged from ~1.8 to ~10,000, a ~5800x spread) let a handful of + high-abundance features dominate both the apparent explained-variance and + the loadings, drowning out features that actually separate the + biological groups but happen to have lower raw intensity. This is the + standard chemometrics pretreatment for PCA/PLS-DA on this kind of data; + NMDS is deliberately NOT autoscaled (its Bray-Curtis dissimilarity is + conventionally computed on raw/relative abundances). - **PLS-DA's explained-variance gotcha**: `sklearn.cross_decomposition.PLSRegression` - defaults to `scale=True` (standardizes X internally), which silently - produced explained-variance ratios off by ~6 orders of magnitude when - compared against unscaled total variance — caught only by running against - real data, not by inspection. Fixed with `scale=False`, matching PCA's - plain-centered (not standardized) treatment. + defaults to `scale=True` (standardizes X internally), which -- before + autoscaling was added -- silently produced explained-variance ratios off + by ~6 orders of magnitude when compared against unscaled total variance, + caught only by running against real data, not by inspection. Fixed with + `scale=False` and autoscaling `x` ourselves first instead (matching PCA's + treatment, and avoiding PLSRegression's `scale=True` also incorrectly + scaling the 0/1 group-membership dummy columns). +- **Loadings-view rendering gotchas** (both only surfaced by checking + against real data, not by inspection): (1) `ax.annotate()`-drawn arrows + don't reliably participate in matplotlib's autoscale the way + `ax.scatter()`/`ax.plot()` do — confirmed empirically that plotted arrow + tips could fall outside the axis' auto-picked view limits — so + `plot_ordination._plot_loadings()` now sets `ax.set_xlim`/`set_ylim` + explicitly from the actually-plotted subset's coordinates, symmetric + around 0. (2) `ordination.top_loadings()` must be called with only the 2 + displayed components (`loadings.iloc[:, :2]`), not the full (up to + 10-component) loadings — ranking by overall magnitude across all + components could let a feature into the "top 25" purely from a large + contribution to some unplotted component while barely showing up in the + actual PC1-vs-PC2 view, displacing a feature that's genuinely prominent + there. - **OPLS-DA intentionally not implemented**: no native scikit-learn support; the alternatives (the unmaintained `pyopls` package, or a from-scratch orthogonal-signal-correction implementation) are both riskier than From ede19688c7d36fc81f288f206d279af78d350a9b Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Mon, 29 Jun 2026 17:41:48 -0400 Subject: [PATCH 08/16] Add dendrogram purity coloring: technical/biological replicate QC view New clusterpurity.py colors dendrogram branches green wherever a whole group's leaves merge together before meeting any other group, plus a Technical/Biological Replicates switcher on the dendrogram tab (mirrors plot_ordination's method/view bar) and a plot-title purity summary (n_pure/n_total). Applies to both the regular and bootstrap (PvClust) dendrogram paths. Co-Authored-By: Claude Sonnet 4.6 --- code/clusterpurity.py | 76 +++++++++++++++++++ code/plotting.py | 126 +++++++++++++++++++++++++------ code/pvclust.py | 16 ++-- code/tests/test_clusterpurity.py | 78 +++++++++++++++++++ devnotes.md | 67 +++++++++++++++- 5 files changed, 334 insertions(+), 29 deletions(-) create mode 100644 code/clusterpurity.py create mode 100644 code/tests/test_clusterpurity.py diff --git a/code/clusterpurity.py b/code/clusterpurity.py new file mode 100644 index 0000000..afcebbd --- /dev/null +++ b/code/clusterpurity.py @@ -0,0 +1,76 @@ +""" +MPACT +Copyright 2022, Robert M. Samples, Sara P. Puckett, and Marcy J. Balunas + +Qt-free dendrogram "purity" coloring: a branch is colored green if every +leaf beneath it shares the same group label -- i.e. that group is a +monophyletic clade, it clustered together before merging with anything +else -- and left at the default color otherwise. Used by the dendrogram tab +to make it visually obvious whether technical replicates of one Sample +cluster tightly together, and separately whether biological replicates of +one Biolgroup are well separated from other groups. + +This module is Qt-free and unit-tested (see ``tests/test_clusterpurity.py``). +""" + + +def purity_link_color_func(Z, leaf_labels, true_color='green', false_color='black'): + """Build a ``link_color_func`` for ``scipy.cluster.hierarchy.dendrogram``. + + Args: + Z: linkage matrix (``scipy.cluster.hierarchy.linkage`` or + fastcluster's drop-in) built on observations in the same order + as ``leaf_labels``. + leaf_labels: sequence, length == number of observations clustered by + ``Z``, giving each leaf's group label (e.g. its Sample or + Biolgroup), in the same order as the data passed to ``linkage``. + + Returns: + callable: ``link_color_func(k)`` as expected by ``dendrogram``'s + ``link_color_func`` argument -- for link index ``k`` + (``len(leaf_labels) <= k``), returns ``true_color`` if every leaf + descending from that link shares one label, else ``false_color``. + """ + n_leaves = len(leaf_labels) + leaf_label_sets = {i: {leaf_labels[i]} for i in range(n_leaves)} + colors = {} + for i, row in enumerate(Z): + a, b = int(row[0]), int(row[1]) + node_id = n_leaves + i + merged = leaf_label_sets[a] | leaf_label_sets[b] + leaf_label_sets[node_id] = merged + colors[node_id] = true_color if len(merged) == 1 else false_color + return lambda k: colors.get(k, false_color) + + +def purity_summary(Z, leaf_labels): + """Count how many distinct group labels form one pure clade each. + + A label is "pure" only if *every* leaf carrying that label ends up + together in one clade before that clade merges with any other leaf -- + i.e. the group is exactly monophyletic in the dendrogram. (A node whose + descendants are a uniform-but-incomplete subset of a label -- e.g. 2 of + a Sample's 3 technical replicates -- does NOT count: the third + replicate clustering elsewhere means that Sample isn't really pure.) + + Returns: + (n_pure, n_total): number of distinct labels that are fully pure + clades, out of the total number of distinct labels in + ``leaf_labels``. + """ + n_leaves = len(leaf_labels) + leaf_index_sets = {i: frozenset((i,)) for i in range(n_leaves)} + target_sets = { + label: frozenset(i for i in range(n_leaves) if leaf_labels[i] == label) + for label in set(leaf_labels) + } + pure_labels = set() + for i, row in enumerate(Z): + a, b = int(row[0]), int(row[1]) + node_id = n_leaves + i + merged = leaf_index_sets[a] | leaf_index_sets[b] + leaf_index_sets[node_id] = merged + for label, target in target_sets.items(): + if merged == target: + pure_labels.add(label) + return len(pure_labels), len(target_sets) diff --git a/code/plotting.py b/code/plotting.py index 9647767..ff0e76f 100644 --- a/code/plotting.py +++ b/code/plotting.py @@ -10,6 +10,7 @@ from csvcache import cached_read_csv, invalidate as invalidate_csv_cache import ordination +import clusterpurity import matplotlib #matplotlib.style.use('ggplot') @@ -798,45 +799,126 @@ def plot(self, parent, file, filtereddfs, groupsets): class plot_dendrogram(ui_plot): """ - Dendrogram generation. - - A CSV file of data for clustering is read and code performs hierarchical clustering using the ward method - and the euclidean distance metric. The resulting dendrogram is plotted on the given frame using the parent object's - figure and canvas. The dendrogram can be either regular or bootstrapped depending on the value of the - parent.analysis_paramsgui.bootstrap parameter. The resulting plot is saved to the parent object's figure and - displayed on the canvas. + Dendrogram generation, with a combo-box switcher (same pattern as + plot_ordination's method/view bar) between two purity-colored views: + + - "Technical Replicates": every injection is its own leaf, colored + green wherever an entire Sample's injections cluster together before + merging with anything else -- a quick visual QC for whether technical + replicates are tight. + - "Biological Replicates": injections are first averaged per Sample + (same collapsing logic as the ordination tab's "Collapse Technical + Replicates" checkbox, via ordination.load_ordination_matrix), then + leaves are colored green wherever an entire Biolgroup's samples + cluster together -- a quick visual QC for whether biological groups + are separable at all, independent of technical noise. + + Either view can be regular or bootstrapped (PvClust), depending on + parent.analysis_paramsgui.bootstrap, same as before this rework. The + purity-coloring math lives in the Qt-free clusterpurity.py. """ + VIEWS = ('Technical Replicates', 'Biological Replicates') + def __init__(self, parent, currplt, frame, file, filtereddfs, groupsets): ui_plot.__init__(self, parent, currplt, frame) self.parent = parent self.currplt = currplt + # Default matches the plot's previous (injection-level) behaviour + # exactly, so existing sessions see no change until they explicitly + # switch to the biological-replicate view. + self.view = 'Technical Replicates' + self._build_switcher_bar(parent, currplt) self.plot(parent, file, filtereddfs, groupsets) + def _build_switcher_bar(self, parent, currplt): + bar = QtWidgets.QWidget() + bar.setStyleSheet(_SWITCHER_BAR_STYLE) + bar.setMaximumHeight(_SWITCHER_BAR_HEIGHT) + layout = QtWidgets.QHBoxLayout(bar) + layout.setContentsMargins(4, 2, 4, 2) + + layout.addWidget(QtWidgets.QLabel('View:')) + view_combo = QtWidgets.QComboBox() + view_combo.addItems(self.VIEWS) + view_combo.setCurrentText(self.view) + view_combo.currentTextChanged.connect(self._on_view_changed) + layout.addWidget(view_combo) + layout.addStretch() + + self.view_combo = view_combo + parent.pltlayout[currplt].insertWidget(0, bar) + + def _on_view_changed(self, view): + self.view = view + self.reset(self._last_file, self._last_filtereddfs, self._last_groupsets) + def plot(self, parent, file, filtereddfs, groupsets): - heirarch = pd.read_csv(file, sep=',', header=[2], index_col=[0]).drop(['m/z', 'Retention time (min)'], axis=1) - data_scaled = normalize(heirarch, axis=0) # normalize features - data_scaled = pd.DataFrame(data_scaled, columns=heirarch.columns, index=heirarch.index) - textlabels = [elem for elem in data_scaled.columns.tolist()] - + self._last_file = file + self._last_filtereddfs = filtereddfs + self._last_groupsets = groupsets + + # PvClust (bootstrap path) expects "variables x objects" -- it + # bootstraps over the rows (features) and transposes internally + # before clustering the columns (the objects/leaves). shc.linkage + # (regular path) expects the opposite, "objects x variables" -- + # build both orientations from the same scaled data below. + if self.view == 'Biological Replicates': + # Collapse technical replicates first -- leaves are Samples, + # purity is judged against Biolgroup. + raw_header = cached_read_csv( + parent.analysis_paramsgui.outputdir / (parent.analysis_paramsgui.filename.stem + '_filtered.csv'), + sep=',', header=None, index_col=[0, 1, 2]).iloc[:3, :].transpose() + x, biolgroup = ordination.load_ordination_matrix(file, raw_header.copy(), collapse_replicates=True) + data_scaled = normalize(x.values, axis=1) # normalize each sample's profile + data_scaled = pd.DataFrame(data_scaled, columns=x.columns, index=x.index) # samples x features + textlabels = data_scaled.index.tolist() + leaf_labels = [biolgroup[sample] for sample in textlabels] + data_for_linkage = data_scaled + data_for_pvclust = data_scaled.transpose() + purity_noun = 'biological groups separable' + else: + heirarch = pd.read_csv(file, sep=',', header=[2], index_col=[0]).drop(['m/z', 'Retention time (min)'], axis=1) + data_scaled = normalize(heirarch, axis=0) # normalize features + data_scaled = pd.DataFrame(data_scaled, columns=heirarch.columns, index=heirarch.index) # features x injections + textlabels = data_scaled.columns.tolist() + raw_header = cached_read_csv( + parent.analysis_paramsgui.outputdir / (parent.analysis_paramsgui.filename.stem + '_filtered.csv'), + sep=',', header=None, index_col=[0, 1, 2]).iloc[:3, :].transpose() + raw_header.columns = ['Biolgroup', 'Sample', 'Injection'] + sample_of_injection = raw_header.set_index('Injection')['Sample'].to_dict() + leaf_labels = [sample_of_injection[name] for name in textlabels] + data_for_linkage = data_scaled.transpose() + data_for_pvclust = data_scaled + purity_noun = "samples' replicates clustered together" + if parent.analysis_paramsgui.bootstrap: # bootstrap dendrogram - pv = PvClust(data_scaled, method="ward", metric="euclidean", nboot=1000, parallel=True) - dend = pv.plot(parent.ax[self.currplt], labels=textlabels) + pv = PvClust(data_for_pvclust, method="ward", metric="euclidean", nboot=1000, parallel=True) + link_color_func = clusterpurity.purity_link_color_func(pv.linkage_matrix, leaf_labels) + dend = pv.plot(parent.ax[self.currplt], labels=textlabels, link_color_func=link_color_func) + Z = pv.linkage_matrix else: # regular dendrogram - dend = shc.dendrogram(shc.linkage(data_scaled.transpose(), method='ward'), ax=parent.ax[self.currplt], leaf_rotation=90, color_threshold=0, above_threshold_color='black', labels=textlabels) # default leaf label size 16 + Z = shc.linkage(data_for_linkage, method='ward') + link_color_func = clusterpurity.purity_link_color_func(Z, leaf_labels) + dend = shc.dendrogram(Z, ax=parent.ax[self.currplt], leaf_rotation=90, above_threshold_color='black', link_color_func=link_color_func, labels=textlabels) # default leaf label size 16 + + n_pure, n_total = clusterpurity.purity_summary(Z, leaf_labels) + parent.ax[self.currplt].set_title(f'{n_pure}/{n_total} {purity_noun}', fontsize=10) parent.fig[self.currplt].subplots_adjust( left=0.1, right=0.95, bottom=0.35, top=0.9, hspace=0.2, wspace=0.2) parent.canvas[self.currplt].draw() -_ORDINATION_SWITCHER_BAR_HEIGHT = 32 +# Shared by plot_dendrogram's and plot_ordination's combo-box switcher bars +# -- page_dend and page_pca both have the same light background +# (rgba(225,225,225,255), see ui_main.py), unlike searchtree.py's filter bar +# (a dark-themed tab) -- dark text on a light/white combo box, not +# searchtree's light-on-dark scheme. +_SWITCHER_BAR_HEIGHT = 32 -# Unlike searchtree.py's filter bar (a dark-themed tab), page_pca's -# background is light (rgba(225,225,225,255), see ui_main.py) -- dark text -# on a light/white combo box, not searchtree's light-on-dark scheme. -_ORDINATION_SWITCHER_STYLE = """ +_SWITCHER_BAR_STYLE = """ QWidget { background: transparent; } @@ -887,8 +969,8 @@ def __init__(self, parent, currplt, frame, file, filtereddfs, groupsets): def _build_switcher_bar(self, parent, currplt): bar = QtWidgets.QWidget() - bar.setStyleSheet(_ORDINATION_SWITCHER_STYLE) - bar.setMaximumHeight(_ORDINATION_SWITCHER_BAR_HEIGHT) + bar.setStyleSheet(_SWITCHER_BAR_STYLE) + bar.setMaximumHeight(_SWITCHER_BAR_HEIGHT) layout = QtWidgets.QHBoxLayout(bar) layout.setContentsMargins(4, 2, 4, 2) diff --git a/code/pvclust.py b/code/pvclust.py index 353d301..1efc2dd 100644 --- a/code/pvclust.py +++ b/code/pvclust.py @@ -187,10 +187,11 @@ def _result(self): columns=['AU', 'BP', 'SE.AU', 'SE.BP', 'pchi', 'v', 'c']) return result - def plot(self, ax, labels=None): #added axis input + def plot(self, ax, labels=None, link_color_func=None): #added axis input """Plot dendrogram with AU BP values for each node""" plot_dendrogram(self.linkage_matrix, - np.array(self.result[['AU', 'BP']]), ax, labels) + np.array(self.result[['AU', 'BP']]), ax, labels, + link_color_func) def seplot(self, pvalue='AU', annotate=False): """p-values vs Standard error plot""" @@ -271,7 +272,7 @@ def find_clusters(self): return clusters -def plot_dendrogram(linkage_matrix, pvalues, axis, labels=None): #added axis input +def plot_dendrogram(linkage_matrix, pvalues, axis, labels=None, link_color_func=None): #added axis input """ Plot dendrogram with AU BP values for each node""" d = dendrogram(linkage_matrix, no_plot=True) xcoord = d["icoord"] @@ -280,13 +281,18 @@ def plot_dendrogram(linkage_matrix, pvalues, axis, labels=None): #added axis inp x = {i: (j[1]+j[2])/2 for i, j in enumerate(xcoord)} y = {i: j[1] for i, j in enumerate(ycoord)} pos = node_positions(y, x) - + plt.figure(figsize=(12, 8)) plt.tight_layout() set_link_color_palette(['c', 'g']) + # link_color_func, when given, takes priority over color_threshold/ + # above_threshold_color (scipy's own precedence rule) -- that's how the + # dendrogram tab's purity coloring (clusterpurity.py) reaches the + # bootstrap dendrogram too. d = dendrogram(linkage_matrix, labels=labels, above_threshold_color='black', - color_threshold=0, leaf_rotation=90, ax = axis) + color_threshold=0, leaf_rotation=90, ax=axis, + link_color_func=link_color_func) maxval = max(y.values()) ax = axis for node, (x, y) in pos.items(): #modifications added to scale y axis label shifts diff --git a/code/tests/test_clusterpurity.py b/code/tests/test_clusterpurity.py new file mode 100644 index 0000000..6cb74cb --- /dev/null +++ b/code/tests/test_clusterpurity.py @@ -0,0 +1,78 @@ +"""Unit tests for dendrogram purity coloring (``clusterpurity.py``).""" + +import numpy as np +from scipy.cluster.hierarchy import linkage + +from clusterpurity import purity_link_color_func, purity_summary + + +def _two_clean_groups(): + """6 points: 3 tightly clustered near (0, 0) labeled 'A', 3 tightly + clustered near (10, 10) labeled 'B' -- each group should merge with + itself long before the two groups merge with each other. + """ + data = np.array([ + [0.0, 0.0], [0.1, 0.0], [0.0, 0.1], + [10.0, 10.0], [10.1, 10.0], [10.0, 10.1], + ]) + labels = ['A', 'A', 'A', 'B', 'B', 'B'] + return data, labels + + +def test_purity_summary_both_groups_pure(): + data, labels = _two_clean_groups() + Z = linkage(data, method='ward') + n_pure, n_total = purity_summary(Z, labels) + assert (n_pure, n_total) == (2, 2) + + +def test_purity_link_color_func_roots_to_false_color_leaves_to_true_color(): + data, labels = _two_clean_groups() + Z = linkage(data, method='ward') + n_leaves = len(labels) + color_func = purity_link_color_func(Z, labels) + + # The final merge (root) joins group A's clade with group B's clade -- + # that link must NOT be the "pure" color. + root_node_id = n_leaves + len(Z) - 1 + assert color_func(root_node_id) == 'black' + + # Every internal node strictly below the root is a within-group merge + # for this dataset (each group's 3 points cluster before the cross-group + # merge) -- those links must be the "pure" color. + for i in range(len(Z) - 1): + node_id = n_leaves + i + assert color_func(node_id) == 'green' + + +def test_purity_link_color_func_custom_colors(): + data, labels = _two_clean_groups() + Z = linkage(data, method='ward') + color_func = purity_link_color_func(Z, labels, true_color='cyan', false_color='grey') + n_leaves = len(labels) + root_node_id = n_leaves + len(Z) - 1 + assert color_func(root_node_id) == 'grey' + assert color_func(n_leaves) == 'cyan' + + +def test_purity_summary_one_mismatched_leaf_breaks_purity_for_its_group(): + # Same as the clean two-group case, but one of group A's points is + # actually closest to group B -- A should no longer be reported pure + # (its leaves don't all merge together before meeting a 'B' leaf), + # while B (unaffected) should still be pure. + data = np.array([ + [0.0, 0.0], [0.1, 0.0], [9.9, 9.9], # last "A" point sits with B + [10.0, 10.0], [10.1, 10.0], [10.0, 10.1], + ]) + labels = ['A', 'A', 'A', 'B', 'B', 'B'] + Z = linkage(data, method='ward') + n_pure, n_total = purity_summary(Z, labels) + assert n_pure == 1 + assert n_total == 2 + + +def test_purity_link_color_func_unknown_link_id_falls_back_to_false_color(): + data, labels = _two_clean_groups() + Z = linkage(data, method='ward') + color_func = purity_link_color_func(Z, labels) + assert color_func(99999) == 'black' diff --git a/devnotes.md b/devnotes.md index 9ece8b8..912e641 100644 --- a/devnotes.md +++ b/devnotes.md @@ -108,7 +108,8 @@ that way. Required deps (gate startup): `epam.indigo`→`indigo`, `UpSetPlot`→ fields), `biogroups.py` (`getgroups()`'s metadata-join/group-derivation core), `dbsearch.py` (`fulldbsearch()`'s NPAtlas ppm-window matching core), `ordination.py` (PCA/NMDS/PLS-DA + technical-replicate collapsing - + top-N loadings selection for the multivariate plot tab). Each + + top-N loadings selection for the multivariate plot tab), `clusterpurity.py` + (dendrogram branch-purity coloring for the dendrogram tab). Each corresponding `MainWindow` method is now a thin wrapper: call the module function, then apply the result to widgets/`self`. - **Runtime widget substitution into a Designer placeholder** is an @@ -161,7 +162,8 @@ python -m pytest code/tests -q ``` Covers `filter`, `stats`, `importdependencies`, `translators`, `groupsets`, -`searchtree`, `ordination`. Add tests here for any new Qt-free logic. +`searchtree`, `ordination`, `clusterpurity`. Add tests here for any new +Qt-free logic. `conftest.py` sets `QT_QPA_PLATFORM=offscreen` and provides a session-scoped `qapp` fixture: PyQt5 widgets/models/signals *can* be exercised headlessly via @@ -279,6 +281,67 @@ only handles the combo boxes, axes, and pick events. Logged here as the next ordination method to add if ever revisited, not started. +## Dendrogram purity coloring (`plotting.plot_dendrogram`, `clusterpurity.py`) + +The dendrogram tab has a combo-box switcher (same runtime-widget-substitution +pattern as `plot_ordination`'s method/view bar) between two views, both +purity-colored to make a QC judgment visible at a glance rather than read off +leaf labels one at a time: + +- **Technical Replicates** (default — matches the tab's previous, only, + behaviour): every Injection is its own leaf. A branch is colored green + wherever *all* of one Sample's injections merge together before merging + with anything else (i.e. that Sample is a monophyletic clade) — a + tight green clump means that sample's replicates agree; black means they + don't. +- **Biological Replicates**: technical replicates are averaged first (same + `ordination.load_ordination_matrix(..., collapse_replicates=True)` used by + the multivariate tab's checkbox), so leaves are Samples, and purity is + judged against Biolgroup instead — green means a whole biological group's + samples cluster together before meeting another group, i.e. the groups are + separable; black means they're not. + +The plot title reports `n_pure/n_total` (e.g. "7/9 samples' replicates +clustered together", "3/3 biological groups separable") using +`clusterpurity.purity_summary()` — the same Qt-free linkage-traversal logic +that drives the coloring, unit-tested in `tests/test_clusterpurity.py`. + +- **Purity is a strict, whole-group check, not "any uniform subset"**: a + label only counts as pure if *every* leaf carrying it ends up in one clade + before that clade touches a different label — 2 of a Sample's 3 replicates + merging together does NOT make that Sample pure if the third replicate + clusters elsewhere. An earlier version of `purity_summary()` got this + wrong (counted a label pure as soon as ANY uniform-label merge occurred, + which is right for `purity_link_color_func`'s per-branch coloring but wrong + for the whole-group summary count) — caught by a test built from a + deliberately "rogue" planted point, not by inspection. +- **PvClust orientation gotcha**: `pvclust.PvClust` expects "variables x + objects" (rows = the things bootstrapped over, i.e. features; it + transposes internally before clustering the columns) — the *opposite* + orientation from `scipy.cluster.hierarchy.linkage`, which expects "objects + x variables". `plot_dendrogram.plot()` builds both orientations + (`data_for_linkage`, `data_for_pvclust`) from the same scaled data rather + than reusing one array, since which one is "transposed" flips between the + Technical (features x injections is the natural read) and Biological + (samples x features, from `load_ordination_matrix`) views. +- **`link_color_func` threaded through the bootstrap path too**: + `PvClust.plot()` and the free function `pvclust.plot_dendrogram()` both + gained a `link_color_func=None` passthrough parameter into their inner + `scipy.cluster.hierarchy.dendrogram()` call, so the AU/BP bootstrap + dendrogram gets the same purity coloring as the regular one (`scipy`'s own + precedence rule: `link_color_func`, when given, overrides + `color_threshold`/`above_threshold_color`). +- **Multiprocessing safety note (re-learned, not new)**: validating the + bootstrap path's wiring during development used `parallel=False` and a + tiny `nboot`, never `PvClust(..., parallel=True)` in an ad hoc script — + `multiprocessing.Pool()` re-executes a script's top-level code in each + spawned child on Windows unless the call site is guarded by + `if __name__ == '__main__':` (the same class of hazard as the frozen-exe + fork-bomb bug elsewhere in this file, just without needing + `freeze_support()` specifically). The real app is fine — `main.py` already + guards its entry point — but throwaway test scripts need the same + discipline. + ## Conventions - Don't edit the generated UI files (above). Put behaviour in `main.py` / From 573cbfa383921c8f4c53ed9a21da67881b013a31 Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Mon, 29 Jun 2026 18:04:08 -0400 Subject: [PATCH 09/16] Dendrogram: polyphyletic branches in red, add a no-coloring option Purity coloring now uses red (not black) for branches that mix more than one group, and a new "Color: None" mode reproduces the tab's pre-purity-coloring appearance exactly (plain black, no title) -- fixes a regression where dropping color_threshold=0 made "None" fall back to scipy's default multi-color palette instead of plain black. Co-Authored-By: Claude Sonnet 4.6 --- code/plotting.py | 80 ++++++++++++++++++++++++++++++++++-------------- devnotes.md | 54 +++++++++++++++++++------------- 2 files changed, 89 insertions(+), 45 deletions(-) diff --git a/code/plotting.py b/code/plotting.py index ff0e76f..faa3c92 100644 --- a/code/plotting.py +++ b/code/plotting.py @@ -799,35 +799,46 @@ def plot(self, parent, file, filtereddfs, groupsets): class plot_dendrogram(ui_plot): """ - Dendrogram generation, with a combo-box switcher (same pattern as - plot_ordination's method/view bar) between two purity-colored views: - - - "Technical Replicates": every injection is its own leaf, colored - green wherever an entire Sample's injections cluster together before - merging with anything else -- a quick visual QC for whether technical - replicates are tight. + Dendrogram generation, with combo-box switchers (same pattern as + plot_ordination's method/view bar) for which leaves to cluster and how + to color the branches: + + Views: + - "Technical Replicates": every injection is its own leaf -- branches + are judged for purity against Sample membership, a quick visual QC + for whether technical replicates are tight. - "Biological Replicates": injections are first averaged per Sample (same collapsing logic as the ordination tab's "Collapse Technical Replicates" checkbox, via ordination.load_ordination_matrix), then - leaves are colored green wherever an entire Biolgroup's samples - cluster together -- a quick visual QC for whether biological groups - are separable at all, independent of technical noise. - - Either view can be regular or bootstrapped (PvClust), depending on - parent.analysis_paramsgui.bootstrap, same as before this rework. The - purity-coloring math lives in the Qt-free clusterpurity.py. + leaves are Samples, judged for purity against Biolgroup -- a quick + visual QC for whether biological groups are separable at all, + independent of technical noise. + + Coloring: + - "Purity": green wherever a branch's leaves are entirely one group + (correctly clustered), red wherever a branch mixes more than one + group (polyphyletic). + - "None": plain black dendrogram, no purity coloring or title -- the + tab's original (pre-purity-coloring) appearance. + + Either view/coloring combination can be regular or bootstrapped + (PvClust), depending on parent.analysis_paramsgui.bootstrap, same as + before this rework. The purity-coloring math lives in the Qt-free + clusterpurity.py. """ VIEWS = ('Technical Replicates', 'Biological Replicates') + COLOR_MODES = ('Purity', 'None') def __init__(self, parent, currplt, frame, file, filtereddfs, groupsets): ui_plot.__init__(self, parent, currplt, frame) self.parent = parent self.currplt = currplt - # Default matches the plot's previous (injection-level) behaviour - # exactly, so existing sessions see no change until they explicitly - # switch to the biological-replicate view. + # Defaults match the plot's previous (injection-level, uncolored) + # behaviour exactly, so existing sessions see no change until they + # explicitly switch the new controls. self.view = 'Technical Replicates' + self.color_mode = 'Purity' self._build_switcher_bar(parent, currplt) self.plot(parent, file, filtereddfs, groupsets) @@ -844,15 +855,27 @@ def _build_switcher_bar(self, parent, currplt): view_combo.setCurrentText(self.view) view_combo.currentTextChanged.connect(self._on_view_changed) layout.addWidget(view_combo) + + layout.addWidget(QtWidgets.QLabel('Color:')) + color_combo = QtWidgets.QComboBox() + color_combo.addItems(self.COLOR_MODES) + color_combo.setCurrentText(self.color_mode) + color_combo.currentTextChanged.connect(self._on_color_mode_changed) + layout.addWidget(color_combo) layout.addStretch() self.view_combo = view_combo + self.color_combo = color_combo parent.pltlayout[currplt].insertWidget(0, bar) def _on_view_changed(self, view): self.view = view self.reset(self._last_file, self._last_filtereddfs, self._last_groupsets) + def _on_color_mode_changed(self, color_mode): + self.color_mode = color_mode + self.reset(self._last_file, self._last_filtereddfs, self._last_groupsets) + def plot(self, parent, file, filtereddfs, groupsets): self._last_file = file self._last_filtereddfs = filtereddfs @@ -895,17 +918,28 @@ def plot(self, parent, file, filtereddfs, groupsets): if parent.analysis_paramsgui.bootstrap: # bootstrap dendrogram pv = PvClust(data_for_pvclust, method="ward", metric="euclidean", nboot=1000, parallel=True) - link_color_func = clusterpurity.purity_link_color_func(pv.linkage_matrix, leaf_labels) - dend = pv.plot(parent.ax[self.currplt], labels=textlabels, link_color_func=link_color_func) Z = pv.linkage_matrix else: # regular dendrogram Z = shc.linkage(data_for_linkage, method='ward') - link_color_func = clusterpurity.purity_link_color_func(Z, leaf_labels) - dend = shc.dendrogram(Z, ax=parent.ax[self.currplt], leaf_rotation=90, above_threshold_color='black', link_color_func=link_color_func, labels=textlabels) # default leaf label size 16 - n_pure, n_total = clusterpurity.purity_summary(Z, leaf_labels) - parent.ax[self.currplt].set_title(f'{n_pure}/{n_total} {purity_noun}', fontsize=10) + if self.color_mode == 'Purity': + # Green = monophyletic (correctly clustered); red = polyphyletic + # (mixes more than one group). + link_color_func = clusterpurity.purity_link_color_func(Z, leaf_labels, true_color='green', false_color='red') + else: + link_color_func = None # plain black dendrogram, scipy's own default + + if parent.analysis_paramsgui.bootstrap: + dend = pv.plot(parent.ax[self.currplt], labels=textlabels, link_color_func=link_color_func) + else: + dend = shc.dendrogram(Z, ax=parent.ax[self.currplt], leaf_rotation=90, color_threshold=0, above_threshold_color='black', link_color_func=link_color_func, labels=textlabels) # default leaf label size 16 + + if self.color_mode == 'Purity': + n_pure, n_total = clusterpurity.purity_summary(Z, leaf_labels) + parent.ax[self.currplt].set_title(f'{n_pure}/{n_total} {purity_noun}', fontsize=10) + # "None" coloring intentionally leaves no title -- this tab had none + # before purity coloring was added. parent.fig[self.currplt].subplots_adjust( left=0.1, right=0.95, bottom=0.35, top=0.9, hspace=0.2, wspace=0.2) diff --git a/devnotes.md b/devnotes.md index 912e641..69eac0a 100644 --- a/devnotes.md +++ b/devnotes.md @@ -283,28 +283,38 @@ only handles the combo boxes, axes, and pick events. ## Dendrogram purity coloring (`plotting.plot_dendrogram`, `clusterpurity.py`) -The dendrogram tab has a combo-box switcher (same runtime-widget-substitution -pattern as `plot_ordination`'s method/view bar) between two views, both -purity-colored to make a QC judgment visible at a glance rather than read off -leaf labels one at a time: - -- **Technical Replicates** (default — matches the tab's previous, only, - behaviour): every Injection is its own leaf. A branch is colored green - wherever *all* of one Sample's injections merge together before merging - with anything else (i.e. that Sample is a monophyletic clade) — a - tight green clump means that sample's replicates agree; black means they - don't. -- **Biological Replicates**: technical replicates are averaged first (same - `ordination.load_ordination_matrix(..., collapse_replicates=True)` used by - the multivariate tab's checkbox), so leaves are Samples, and purity is - judged against Biolgroup instead — green means a whole biological group's - samples cluster together before meeting another group, i.e. the groups are - separable; black means they're not. - -The plot title reports `n_pure/n_total` (e.g. "7/9 samples' replicates -clustered together", "3/3 biological groups separable") using -`clusterpurity.purity_summary()` — the same Qt-free linkage-traversal logic -that drives the coloring, unit-tested in `tests/test_clusterpurity.py`. +The dendrogram tab has two combo-box switchers (same runtime-widget- +substitution pattern as `plot_ordination`'s method/view bar): + +- **View** — which leaves to cluster: + - **Technical Replicates** (default — matches the tab's original + behaviour): every Injection is its own leaf, purity judged against + Sample membership — a tight monophyletic clump means that sample's + replicates agree. + - **Biological Replicates**: technical replicates are averaged first + (same `ordination.load_ordination_matrix(..., collapse_replicates=True)` + used by the multivariate tab's checkbox), so leaves are Samples, purity + judged against Biolgroup instead — a monophyletic clade means a whole + biological group's samples cluster together before meeting another + group, i.e. the groups are separable. +- **Color** — how to render purity: + - **Purity** (default): green wherever a branch's leaves are entirely one + group (correctly clustered), red wherever a branch mixes more than one + group (polyphyletic) — a QC judgment visible at a glance rather than + read off leaf labels one at a time. The plot title reports + `n_pure/n_total` (e.g. "7/9 samples' replicates clustered together", + "3/3 biological groups separable") via `clusterpurity.purity_summary()`. + - **None**: plain black, no title — deliberately reproduces the tab's + appearance from *before* purity coloring existed (there was no title at + all previously), for anyone who just wants the dendrogram shape without + the QC overlay. Implemented as `link_color_func=None` with + `color_threshold=0` still set (dropping `color_threshold=0` here was a + real regression caught while testing: without it, scipy falls back to + its own default 0.7-of-max-height threshold and a multi-color palette + instead of plain black). + +Both views' purity math is the same Qt-free linkage-traversal logic in +`clusterpurity.py`, unit-tested in `tests/test_clusterpurity.py`. - **Purity is a strict, whole-group check, not "any uniform subset"**: a label only counts as pure if *every* leaf carrying it ends up in one clade From bab713d05d38d87e5a40f5838ffdd66732db1052 Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Mon, 29 Jun 2026 18:16:19 -0400 Subject: [PATCH 10/16] Replace treemap/upset PNG round-trip with real canvas plots gen_treemap/gen_upsetplt used to savefig() a PNG to the repo root and load it into a QLabel via QPixmap -- no zoom/pan/save toolbar, and a flat raster rewritten on every run. plot_treemap/plot_upset now draw directly onto a persistent FigureCanvas, wired into _generate_plots() via the same _create_or_reset pattern every other plot uses, so they regenerate on both a fresh analysis and the Apply button (previously only on a fresh analysis). Co-Authored-By: Claude Sonnet 4.6 --- code/main.py | 23 ++--- code/plotting.py | 263 ++++++++++++++++++++++++++++------------------- devnotes.md | 49 +++++++++ 3 files changed, 217 insertions(+), 118 deletions(-) diff --git a/code/main.py b/code/main.py index fc91309..418e2bd 100644 --- a/code/main.py +++ b/code/main.py @@ -41,7 +41,7 @@ from biogroups import compute_biological_groups from dbsearch import search_npatlas from searchtree import SearchTreePanel -from plotting import plot_abund, show_spectrum, show_featureplt, plot_heatmap, plot_mzrt, plot_samplecorr, kendrick, plot_volcano, plot_fc3d, plot_dendrogram, plot_ordination, prev_cv, gen_upsetplt, gen_treemap +from plotting import plot_abund, show_spectrum, show_featureplt, plot_heatmap, plot_mzrt, plot_samplecorr, kendrick, plot_volcano, plot_fc3d, plot_dendrogram, plot_ordination, prev_cv, plot_upset, plot_treemap import getfragdb from indigo import Indigo @@ -1056,6 +1056,11 @@ def _generate_plots(self): dfs = self.filtereddfs grpsts = self.groupsets + self._create_or_reset('treemap', 'treemap', + lambda: plot_treemap(self, 'treemap', self.ui.frame_treemap, pltfile, '', ''), + lambda: self.treemap.reset(pltfile, '', '')) + stop_functime('treemap complete') + if params.CVfil: self._create_or_reset('prevcv', 'CV plot', lambda: prev_cv(self, 'cvplt', self.ui.frame_cvplt, 'none', 'none', 'none'), @@ -1119,6 +1124,11 @@ def _generate_plots(self): lambda: self.samplecorr.reset(iondictfile, dfs, grpsts)) stop_functime('samplecorr complete') + self._create_or_reset('upset', 'upset plot', + lambda: plot_upset(self, 'upset', self.ui.frame_upset, iondictfile, '', ''), + lambda: self.upset.reset(iondictfile, '', '')) + stop_functime('upsetplt complete') + def run_analysis(self): # Ignore re-clicks while an analysis is already running on the worker thread. if getattr(self, '_analysis_thread', None) is not None and self._analysis_thread.isRunning(): @@ -1171,12 +1181,6 @@ def _on_compute_finished(self): self.ui.btn_run.setEnabled(True) def _finish_analysis(self): - try: - gen_treemap(self) # move back to end - except Exception: - print("not generating tremap due to an error") - stop_functime('treemap complete') - # Used for point opacity based on abundance colouring iondict = cached_read_csv(self.analysis_paramsgui.outputdir / 'iondict.csv', sep=',', header=[0], index_col=None) self.analysis_paramsgui.maxval = iondict['logmax'].max() @@ -1224,11 +1228,6 @@ def _finish_analysis(self): self.fillfttree() self.dbsearchdone = True - try: - gen_upsetplt(self) - except Exception: - print("not generating upset plot due to an error") - stop_functime('upsetplt complete') self.ui.label_status.setText('Analysis Complete') stop_functime('analysis complete') print('') diff --git a/code/plotting.py b/code/plotting.py index faa3c92..06d1b7f 100644 --- a/code/plotting.py +++ b/code/plotting.py @@ -31,7 +31,7 @@ import platform from PyQt5 import QtCore, QtGui, QtWidgets from PyQt5.QtCore import (QCoreApplication, QPropertyAnimation, QDate, QDateTime, QMetaObject, QObject, QPoint, QRect, QSize, QTime, QUrl, Qt, QEvent) -from PyQt5.QtGui import (QBrush, QColor, QIcon, QPalette, QPainter, QPixmap) +from PyQt5.QtGui import (QBrush, QColor, QIcon, QPalette, QPainter) from PyQt5.QtWidgets import * from pathlib import Path @@ -1361,114 +1361,165 @@ def plot(self, parent, file, filtereddfs, groupsets): parent.canvas[currplt].draw() -def gen_upsetplt(parent): #need to do something to handle groups with names that are substrings of other group names +def _detach_placeholder_widget(frame, old_widget): + """Remove a Designer-placed placeholder widget (and the layout holding + it) from ``frame`` so a fresh layout can be installed in its place. + + Most plot frames in this app start out empty in Designer, so + ``ui_plot.__init__`` can just call ``frame.setLayout(...)`` directly. + ``frame_treemap``/``frame_upset`` are the exception -- Designer gave + them a layout with a placeholder ``QLabel`` (the old static-image + target) already in it, and Qt refuses ``setLayout()`` on a frame that + already has one. Reparenting the old layout onto a throwaway widget + (the standard Qt trick for "delete this layout") detaches it from + ``frame`` without touching anything else -- same runtime + widget-substitution pattern as searchtree.py's filter-bar swap. + """ + old_layout = frame.layout() + if old_layout is not None: + old_layout.removeWidget(old_widget) + old_widget.setParent(None) + old_widget.deleteLater() + QtWidgets.QWidget().setLayout(old_layout) + + +class plot_treemap(ui_plot): + """Treemap of how many features each enabled filter removed. + + Drawn directly onto a persistent FigureCanvas (same runtime-widget- + substitution + ui_plot pattern as every other plot tab) instead of the + previous ``squarify.plot()`` -> ``savefig('treemap.png')`` -> ``QPixmap`` + round trip into a Designer-placed ``QLabel`` (``label_treemap``) -- that + PNG round trip meant no zoom/pan/save-at-resolution toolbar, and a flat + raster file rewritten at the repo root on every run. """ - Generate an upset plot to visualize sets of compounds in groups. This function also handles groups with names that are substrings of other group names. - Parameters: - parent (object): The parent object that the generated plot will be a child of. + def __init__(self, parent, currplt, frame, file, filtereddfs, groupsets): + _detach_placeholder_widget(frame, parent.ui.label_treemap) + ui_plot.__init__(self, parent, currplt, frame) + self.parent = parent + self.currplt = currplt + self.plot(parent, file, filtereddfs, groupsets) - Returns: - None - """ - iondict = cached_read_csv(parent.analysis_paramsgui.outputdir / 'iondict.csv', sep=',', header=0, index_col=None) - - # Apply filters if required - if parent.analysis_paramsgui.relfil: - iondict = iondict[iondict['pass_relfil']] - if parent.analysis_paramsgui.decon: - iondict = iondict[iondict['pass_insource']] - if parent.analysis_paramsgui.blnkfltr: - iondict = iondict[iondict['pass_blnkfil']] - if parent.analysis_paramsgui.CVfil: - iondict = iondict[iondict['pass_cvfil']] - - # Prepare data for upset plot - iongroups = iondict['groups'].tolist() - freq = {} - biolgroups = [] - for item in iongroups: - if item not in freq: - freq[item] = 0 - freq[item] += 1 - - header = cached_read_csv(parent.analysis_paramsgui.outputdir / (parent.analysis_paramsgui.filename.stem + '_filtered.csv'), sep=',', header=None, index_col=[0, 1, 2]).iloc[0, :] - for elem in header: - if elem not in biolgroups: - biolgroups.append(elem) - - sets = [' ' + elem for elem in list(freq.keys())] - size = list(freq.values()) - setdf = pd.DataFrame({'groups': sets}) - for elem in biolgroups: #have to do this if one group is a substring of another, add space - setdf[elem] = setdf['groups'].str.contains(' ' + elem) - setdf['size'] = size - setdf = setdf.iloc[:, 1:] - setdf = setdf.set_index(biolgroups)['size'] - - # Plot and display the upset plot - with plt.rc_context({"font.size": 8}): - upsetplt = upsetplot.plot(setdf, show_counts='%d', show_percentages=True, sort_categories_by=None) - - figup = upsetplt['matrix'].figure - figup.set_size_inches(5, 4) - figup.set_facecolor((0, 0, 0, 0)) - upsetplt['intersections'].set_facecolor((1, 1, 1, .25)) - figup.savefig('test_upsetplt.png', dpi=150, bbox_inches='tight') - pixmap = QPixmap('test_upsetplt.png') - parent.ui.label_upset.setPixmap(pixmap) - -def gen_treemap(parent): - #generate treemap for visualization of filtering levels - #needed to refilter data and see how df row lengths change to avoid issues with one feature being in multiple filter lists - """ - The gen_treemap function generates a treemap for visualizing filtering levels. The function reads a CSV file containing the - filtered data and another CSV file containing information about the ions. The function then filters the ion data based on - various filter options and calculates the number of ions filtered by each filter. Finally, the function generates a treemap - to display the number of ions that passed each filter and saves it as a PNG file. The treemap is then displayed in a QLabel in the GUI. + def plot(self, parent, file, filtereddfs, groupsets): + msdata_filtered = cached_read_csv( + parent.analysis_paramsgui.outputdir / (parent.analysis_paramsgui.filename.stem + '_filtered.csv'), + sep=',', header=[0, 1, 2], index_col=[0, 1, 2]) + iondict = cached_read_csv(parent.analysis_paramsgui.outputdir / 'iondict.csv', sep=',', header=[0], index_col=[0]) - Args: - - parent: the parent widget where the treemap will be displayed + fltrcnt, color = {}, [] + current = len(iondict.index) + + if parent.analysis_paramsgui.relfil: + filteredsetsize = len(iondict[iondict['pass_relfil']].index) + fltrcnt['Mispicked'] = current - filteredsetsize + current = filteredsetsize + color.append('#0000ff') + + if parent.analysis_paramsgui.blnkfltr: + filteredsetsize = len(iondict[iondict['pass_blnkfil']].index) + fltrcnt['Blank'] = current - filteredsetsize + current = filteredsetsize + color.append('#00aaaa') + + if parent.analysis_paramsgui.CVfil: + fltrcnt['Nonreproducible'] = len(parent.ionfilters['cv'].ions) + current = current - fltrcnt['Nonreproducible'] + color.append('#ff0000') + + if parent.analysis_paramsgui.decon: + fltrcnt['Insource'] = len(parent.ionfilters['insource'].ions) + color.append('#00aa00') + + fltrcnt['High Quality'] = len(msdata_filtered.index) + color.append('#000000') + + sizes = list(fltrcnt.values()) + total_size = sum(sizes) + labels = [f"{label}\n{size}\n{round(100 * size / total_size, 1)}%" for label, size in fltrcnt.items()] + + ax = parent.ax[self.currplt] + ax.clear() + squarify.plot(sizes=sizes, label=labels, color=color, alpha=0.3, text_kwargs={'fontsize': 10}, ax=ax) + ax.axis('off') + parent.canvas[self.currplt].draw() + + +class plot_upset: + """Upset plot of how filtered features distribute across groupsets. + Drawn directly onto a persistent Figure -- ``upsetplot.plot()`` accepts + an existing ``fig=`` instead of always creating its own -- rather than + the previous ``upsetplot.plot()`` -> ``savefig('test_upsetplt.png')`` -> + ``QPixmap`` round trip into the Designer-placed ``label_upset``. + + Doesn't subclass ``ui_plot``, the same as ``plot_heatmap`` and for the + same reason: ``upsetplot`` lays out several axes (matrix, totals, + intersections, shading) on the figure itself via its own gridspec -- + there's no single "ax" to hand callers the way every scatter/line plot + here has, so ``ui_plot.__init__``'s single pre-made ``ax`` would just be + an unused, overlapping blank axes. """ - plt.clf() - msdata_filtered = cached_read_csv(parent.analysis_paramsgui.outputdir / (parent.analysis_paramsgui.filename.stem + '_filtered.csv'), sep=',', header=[0, 1, 2], index_col=[0, 1, 2]) - fltrcnt, color = {}, [] - iondict = cached_read_csv(parent.analysis_paramsgui.outputdir / 'iondict.csv', sep=',', header=[0], index_col=[0]) - total = len(iondict.index) - current = total - - if parent.analysis_paramsgui.relfil: - filteredsetsize = len(iondict[iondict['pass_relfil']].index) - fltrcnt['Mispicked'] = current - filteredsetsize - current = filteredsetsize - color.append('#0000ff') - - if parent.analysis_paramsgui.blnkfltr: - filteredsetsize = len(iondict[iondict['pass_blnkfil']].index) - fltrcnt['Blank'] = current - filteredsetsize - current = filteredsetsize - color.append('#00aaaa') - - if parent.analysis_paramsgui.CVfil: - fltrcnt['Nonreproducible'] = len(parent.ionfilters['cv'].ions) - current = current - fltrcnt['Nonreproducible'] - color.append('#ff0000') - - if parent.analysis_paramsgui.decon: - fltrcnt['Insource'] = len(parent.ionfilters['insource'].ions) - color.append('#00aa00') - - fltrcnt['High Quality'] = len(msdata_filtered.index) - color.append('#000000') - - sizes = list(fltrcnt.values()) - total_size = sum(fltrcnt.values()) - labels = [f"{label}\n{size}\n{round(100*size/total_size,1)}%" for label, size in fltrcnt.items()] - - squarify.plot(sizes=sizes, label=labels, color=color, alpha=0.3, text_kwargs={'fontsize': 10}) - plt.axis('off') - plt.savefig('treemap.png', dpi=150, bbox_inches='tight') - pixmap = QPixmap('treemap.png') - parent.ui.label_treemap.setPixmap(pixmap) \ No newline at end of file + + def __init__(self, parent, currplt, frame, file, filtereddfs, groupsets): + _detach_placeholder_widget(frame, parent.ui.label_upset) + self.parent = parent + self.currplt = currplt + + parent.fig[currplt] = Figure() + parent.pltlayout[currplt] = QtWidgets.QVBoxLayout() + parent.canvas[currplt] = FigureCanvas(parent.fig[currplt]) + parent.pltlayout[currplt].addWidget(parent.canvas[currplt]) + parent.toolbar[currplt] = NavigationToolbar(parent.canvas[currplt], parent) + parent.toolbar[currplt].setStyleSheet("background-color:rgba(225,225,225,0);") + parent.pltlayout[currplt].addWidget(parent.toolbar[currplt]) + frame.setLayout(parent.pltlayout[currplt]) + + self.plotbackground = (.89, .89, .89, 0) + self.plot(parent, file, filtereddfs, groupsets) + + def plot(self, parent, file, filtereddfs, groupsets): + # upsetplot.plot() lays out fresh axes via its own gridspec on + # whatever figure it's given -- clear the figure first so repeated + # calls (regenerate/Apply) don't pile up axes on top of each other. + parent.fig[self.currplt].clf() + + iondict = cached_read_csv(parent.analysis_paramsgui.outputdir / 'iondict.csv', sep=',', header=0, index_col=None) + if parent.analysis_paramsgui.relfil: + iondict = iondict[iondict['pass_relfil']] + if parent.analysis_paramsgui.decon: + iondict = iondict[iondict['pass_insource']] + if parent.analysis_paramsgui.blnkfltr: + iondict = iondict[iondict['pass_blnkfil']] + if parent.analysis_paramsgui.CVfil: + iondict = iondict[iondict['pass_cvfil']] + + iongroups = iondict['groups'].tolist() + freq = {} + for item in iongroups: + freq[item] = freq.get(item, 0) + 1 + + header = cached_read_csv( + parent.analysis_paramsgui.outputdir / (parent.analysis_paramsgui.filename.stem + '_filtered.csv'), + sep=',', header=None, index_col=[0, 1, 2]).iloc[0, :] + biolgroups = [] + for elem in header: + if elem not in biolgroups: + biolgroups.append(elem) + + sets = [' ' + elem for elem in freq.keys()] + setdf = pd.DataFrame({'groups': sets}) + for elem in biolgroups: # space-prefix handles one group name being a substring of another + setdf[elem] = setdf['groups'].str.contains(' ' + elem) + setdf['size'] = list(freq.values()) + setdf = setdf.iloc[:, 1:].set_index(biolgroups)['size'] + + with plt.rc_context({"font.size": 8}): + upsetplt = upsetplot.plot(setdf, fig=parent.fig[self.currplt], show_counts='%d', show_percentages=True, sort_categories_by=None) + + parent.fig[self.currplt].set_facecolor(self.plotbackground) + upsetplt['intersections'].set_facecolor((1, 1, 1, .25)) + parent.canvas[self.currplt].draw() + + def reset(self, file, filtereddfs, groupsets): + self.plot(self.parent, file, filtereddfs, groupsets) \ No newline at end of file diff --git a/devnotes.md b/devnotes.md index 69eac0a..9b496b7 100644 --- a/devnotes.md +++ b/devnotes.md @@ -352,6 +352,55 @@ Both views' purity math is the same Qt-free linkage-traversal logic in guards its entry point — but throwaway test scripts need the same discipline. +## Treemap / upset plot canvases (`plotting.plot_treemap`, `plotting.plot_upset`) + +These two tabs used to be the only plots in the app that weren't real +matplotlib canvases: `gen_treemap`/`gen_upsetplt` (free functions, not +`ui_plot` subclasses) drew with `squarify`/`upsetplot`, `savefig()`'d a PNG +to the repo root (`treemap.png`/`test_upsetplt.png`), then loaded that PNG +into a `QPixmap` on the Designer-placed `label_treemap`/`label_upset`. That +meant no zoom/pan/save-at-resolution toolbar, a flat raster rewritten from +scratch on every run, and files left sitting at the repo root. + +Both are now `ui_plot`-style classes (`plot_treemap`/`plot_upset`) drawing +directly onto a persistent `FigureCanvas`, wired into `MainWindow._generate_plots()` +via `_create_or_reset()` exactly like every other plot — so they're created +once and `.reset()` afterward, regenerating on both a fresh analysis run and +the dialog's "Apply" button (`regenerateplts()`), the same as every other +plot. They previously were NOT regenerated by Apply at all (`gen_treemap`/ +`gen_upsetplt` were only ever called once, directly from `_finish_analysis`) +— a small behavior change, but one that brings them in line with how every +other plot already worked, not a new inconsistency. + +- **`frame_treemap`/`frame_upset` needed a different substitution trick**: + unlike most plot frames (empty in Designer, so `ui_plot.__init__` can just + call `frame.setLayout(...)`), these two already have a Designer-built + layout holding the old placeholder `QLabel` — Qt refuses `setLayout()` on + a frame that already has one. `plotting._detach_placeholder_widget()` + removes the old label and reparents the old layout onto a throwaway + widget (the standard Qt "delete this layout" trick) before the normal + `ui_plot.__init__`/manual canvas setup runs — same runtime + widget-substitution pattern as `searchtree.py`'s filter-bar swap, just + with an extra detach step first. Verified headlessly (offscreen Qt) that + this doesn't raise and the frame ends up with exactly the new layout. +- **`plot_upset` doesn't subclass `ui_plot`**, same as `plot_heatmap` and for + the same reason: `upsetplot.plot()` lays out several axes (matrix, + totals, intersections, shading) via its own gridspec on whatever figure + it's given — there's no single "ax" to hand callers the way every + scatter/line plot here has. Unlike `plot_heatmap` (which has to transplant + axes from a brand-new seaborn figure onto the persistent one, since + `sns.clustermap()` doesn't accept an existing figure), `upsetplot.plot()` + takes a `fig=` kwarg directly — `reset()` just `fig.clf()`s and re-plots + onto the same figure, no axes-transplant needed. +- **Verified against real data, not just import-checked**: a throwaway + headless-Qt script built fake Designer-style frames (QFrame + layout + + placeholder QLabel, matching `frame_treemap`/`frame_upset`'s actual + structure), ran both classes against the real example dataset, and + asserted (1) the new canvas/toolbar actually replaced the old layout, (2) + axes count is stable across `.reset()` calls (not growing — would mean + old axes/figures were leaking), and (3) no PNG got written to disk by + either plot anymore. + ## Conventions - Don't edit the generated UI files (above). Put behaviour in `main.py` / From 3be44dc3db4088966ac78e3ae527663de7305bfb Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Mon, 29 Jun 2026 18:34:35 -0400 Subject: [PATCH 11/16] Dendrogram: bridge-only red coloring; move bootstrap/collapse checkboxes to per-plot bars - clusterpurity.purity_link_color_func now distinguishes pure (green), bridge (red -- the specific merge where a new group first meets an existing one), and neutral (black -- combining two already-impure clades, no new information). Previously every ancestor of a single mixing event also rendered red, painting most of the tree's upper structure red regardless of how localized the actual mixing was. - "Bootstrap Analysis" and "Collapse Technical Replicates" moved off the global plot-config dialog (where each only ever affected one plot) onto that plot's own switcher bar: plot_dendrogram gets a "Bootstrap" checkbox, plot_ordination gets a "Collapse Replicates" checkbox. The now-orphaned dialog widgets are hidden at runtime (not edited out of the generated ui_plotparam.py); 'bootstrap' is dropped from paramfields.CHECKBOX_FIELDS since it's no longer pickled, consistent with the dendrogram/ordination tabs' other per-session-only view state. - Delete code/treemap.png and code/test_upsetplt.png: dead tracked files left over from before the canvas-based rendering change -- nothing reads or writes them anymore. Co-Authored-By: Claude Sonnet 4.6 --- code/clusterpurity.py | 34 ++++++++++++++++++---- code/paramfields.py | 1 - code/plotting.py | 48 +++++++++++++++++++++++++------ code/test_upsetplt.png | Bin 41302 -> 0 bytes code/tests/test_clusterpurity.py | 43 +++++++++++++++++++++++---- code/treemap.png | Bin 29414 -> 0 bytes code/ui_functions.py | 9 +++++- devnotes.md | 36 +++++++++++++++++++++++ 8 files changed, 150 insertions(+), 21 deletions(-) delete mode 100644 code/test_upsetplt.png delete mode 100644 code/treemap.png diff --git a/code/clusterpurity.py b/code/clusterpurity.py index afcebbd..acf8657 100644 --- a/code/clusterpurity.py +++ b/code/clusterpurity.py @@ -14,9 +14,25 @@ """ -def purity_link_color_func(Z, leaf_labels, true_color='green', false_color='black'): +def purity_link_color_func(Z, leaf_labels, true_color='green', false_color='red', neutral_color='black'): """Build a ``link_color_func`` for ``scipy.cluster.hierarchy.dendrogram``. + Three-way coloring, not just pure-vs-not: + + - ``true_color`` ("pure"/monophyletic): every leaf under this link + shares one label. + - ``false_color`` ("bridge"): this link is impure, but at least one of + its two children was itself pure (a single leaf counts as trivially + pure) -- this is the *specific* merge where a different label first + gets bridged in, i.e. exactly the "bridge sample"/"two groups meet + here" point. + - ``neutral_color``: this link is impure AND both children were already + impure -- i.e. it's just continuing an already-known mix further up + the tree, not new information. Without this third state, every + ancestor of a single bridge point would also render in + ``false_color``, painting most of the upper tree the "bad" color even + though only one merge actually caused it. + Args: Z: linkage matrix (``scipy.cluster.hierarchy.linkage`` or fastcluster's drop-in) built on observations in the same order @@ -27,20 +43,26 @@ def purity_link_color_func(Z, leaf_labels, true_color='green', false_color='blac Returns: callable: ``link_color_func(k)`` as expected by ``dendrogram``'s - ``link_color_func`` argument -- for link index ``k`` - (``len(leaf_labels) <= k``), returns ``true_color`` if every leaf - descending from that link shares one label, else ``false_color``. + ``link_color_func`` argument. """ n_leaves = len(leaf_labels) leaf_label_sets = {i: {leaf_labels[i]} for i in range(n_leaves)} + is_pure = {i: True for i in range(n_leaves)} # every leaf is trivially pure colors = {} for i, row in enumerate(Z): a, b = int(row[0]), int(row[1]) node_id = n_leaves + i merged = leaf_label_sets[a] | leaf_label_sets[b] leaf_label_sets[node_id] = merged - colors[node_id] = true_color if len(merged) == 1 else false_color - return lambda k: colors.get(k, false_color) + pure = len(merged) == 1 + is_pure[node_id] = pure + if pure: + colors[node_id] = true_color + elif is_pure[a] or is_pure[b]: + colors[node_id] = false_color + else: + colors[node_id] = neutral_color + return lambda k: colors.get(k, neutral_color) def purity_summary(Z, leaf_labels): diff --git a/code/paramfields.py b/code/paramfields.py index fc76a77..eb0100e 100644 --- a/code/paramfields.py +++ b/code/paramfields.py @@ -23,7 +23,6 @@ CHECKBOX_FIELDS = ( ('PCA', ('ui', 'checkBox_pca')), ('Dendrogram', ('ui', 'checkBox_dend')), - ('bootstrap', ('dialog.ui', 'checkBox_bootstrap')), ('MZRTplt', ('ui', 'checkBox_mzrt')), ('KMD', ('ui', 'checkBox_kmd')), ('mdguide', ('dialog.ui', 'checkBox_mdguide')), diff --git a/code/plotting.py b/code/plotting.py index 06d1b7f..1467cd0 100644 --- a/code/plotting.py +++ b/code/plotting.py @@ -822,9 +822,10 @@ class plot_dendrogram(ui_plot): tab's original (pre-purity-coloring) appearance. Either view/coloring combination can be regular or bootstrapped - (PvClust), depending on parent.analysis_paramsgui.bootstrap, same as - before this rework. The purity-coloring math lives in the Qt-free - clusterpurity.py. + (PvClust) depending on the "Bootstrap" checkbox in this tab's own + switcher bar (formerly the plot-config dialog's global "Bootstrap + Analysis" checkbox -- moved here since it only ever applied to this + plot). The purity-coloring math lives in the Qt-free clusterpurity.py. """ VIEWS = ('Technical Replicates', 'Biological Replicates') @@ -836,9 +837,13 @@ def __init__(self, parent, currplt, frame, file, filtereddfs, groupsets): self.currplt = currplt # Defaults match the plot's previous (injection-level, uncolored) # behaviour exactly, so existing sessions see no change until they - # explicitly switch the new controls. + # explicitly switch the new controls. ``bootstrap`` defaults to True + # to match the checked-on-startup state the old global checkbox was + # forced to in ui_functions.py (its Designer default was actually + # False, overridden at runtime -- True is what users actually saw). self.view = 'Technical Replicates' self.color_mode = 'Purity' + self.bootstrap = True self._build_switcher_bar(parent, currplt) self.plot(parent, file, filtereddfs, groupsets) @@ -862,10 +867,16 @@ def _build_switcher_bar(self, parent, currplt): color_combo.setCurrentText(self.color_mode) color_combo.currentTextChanged.connect(self._on_color_mode_changed) layout.addWidget(color_combo) + + bootstrap_check = QtWidgets.QCheckBox('Bootstrap') + bootstrap_check.setChecked(self.bootstrap) + bootstrap_check.toggled.connect(self._on_bootstrap_toggled) + layout.addWidget(bootstrap_check) layout.addStretch() self.view_combo = view_combo self.color_combo = color_combo + self.bootstrap_check = bootstrap_check parent.pltlayout[currplt].insertWidget(0, bar) def _on_view_changed(self, view): @@ -876,6 +887,10 @@ def _on_color_mode_changed(self, color_mode): self.color_mode = color_mode self.reset(self._last_file, self._last_filtereddfs, self._last_groupsets) + def _on_bootstrap_toggled(self, checked): + self.bootstrap = checked + self.reset(self._last_file, self._last_filtereddfs, self._last_groupsets) + def plot(self, parent, file, filtereddfs, groupsets): self._last_file = file self._last_filtereddfs = filtereddfs @@ -915,7 +930,7 @@ def plot(self, parent, file, filtereddfs, groupsets): data_for_pvclust = data_scaled purity_noun = "samples' replicates clustered together" - if parent.analysis_paramsgui.bootstrap: + if self.bootstrap: # bootstrap dendrogram pv = PvClust(data_for_pvclust, method="ward", metric="euclidean", nboot=1000, parallel=True) Z = pv.linkage_matrix @@ -930,7 +945,7 @@ def plot(self, parent, file, filtereddfs, groupsets): else: link_color_func = None # plain black dendrogram, scipy's own default - if parent.analysis_paramsgui.bootstrap: + if self.bootstrap: dend = pv.plot(parent.ax[self.currplt], labels=textlabels, link_color_func=link_color_func) else: dend = shc.dendrogram(Z, ax=parent.ax[self.currplt], leaf_rotation=90, color_threshold=0, above_threshold_color='black', link_color_func=link_color_func, labels=textlabels) # default leaf label size 16 @@ -983,6 +998,10 @@ class plot_ordination(ui_plot): The actual math lives in the Qt-free ``ordination.py`` (PCA/NMDS/PLS-DA, technical-replicate collapsing, top-N loadings selection); this class is just the Qt plumbing and rendering on top of it. + + The switcher bar also has a "Collapse Replicates" checkbox (formerly the + plot-config dialog's global "Collapse Technical Replicates" checkbox -- + moved here since it only ever applied to this plot). """ METHODS = ('NMDS', 'PCA', 'PLS-DA') @@ -994,9 +1013,12 @@ def __init__(self, parent, currplt, frame, file, filtereddfs, groupsets): self.currplt = currplt # Defaults match the plot's previous (NMDS-only, scores-only) # behaviour exactly, so existing sessions see no change until they - # explicitly switch the new controls. + # explicitly switch the new controls. ``collapse_replicates`` + # defaults to True, matching the old global checkbox's Designer + # default. self.method = 'NMDS' self.view = 'Scores' + self.collapse_replicates = True self.loadings_df = None self._build_switcher_bar(parent, currplt) self.plot(parent, file, filtereddfs, groupsets) @@ -1021,10 +1043,16 @@ def _build_switcher_bar(self, parent, currplt): view_combo.setCurrentText(self.view) view_combo.currentTextChanged.connect(self._on_view_changed) layout.addWidget(view_combo) + + collapse_check = QtWidgets.QCheckBox('Collapse Replicates') + collapse_check.setChecked(self.collapse_replicates) + collapse_check.toggled.connect(self._on_collapse_replicates_toggled) + layout.addWidget(collapse_check) layout.addStretch() self.method_combo = method_combo self.view_combo = view_combo + self.collapse_check = collapse_check parent.pltlayout[currplt].insertWidget(0, bar) def _on_method_changed(self, method): @@ -1035,6 +1063,10 @@ def _on_view_changed(self, view): self.view = view self.reset(self._last_file, self._last_filtereddfs, self._last_groupsets) + def _on_collapse_replicates_toggled(self, checked): + self.collapse_replicates = checked + self.reset(self._last_file, self._last_filtereddfs, self._last_groupsets) + def plot(self, parent, file, filtereddfs, groupsets): """(Re)draw the ordination plot for the current method/view. @@ -1050,7 +1082,7 @@ class follows). self._last_filtereddfs = filtereddfs self._last_groupsets = groupsets - collapse_replicates = parent.dialog.ui.checkBox_collapsereps.isChecked() + collapse_replicates = self.collapse_replicates raw_header = cached_read_csv( parent.analysis_paramsgui.outputdir / (parent.analysis_paramsgui.filename.stem + '_filtered.csv'), sep=',', header=None, index_col=[0, 1, 2]).iloc[:3, :].transpose() diff --git a/code/test_upsetplt.png b/code/test_upsetplt.png deleted file mode 100644 index da8893e51d26552daeb18fb94e66721d2aa467af..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 41302 zcmd?RXIK>L)-75M3JL-?2&hCG5y=Vyl2KbwK+pyW5>=8!C1=`zHlQE~C_z9G5J{3H zVaF>XIp-K-mfux1B^s)uR0x95 zT)u=SAP8AA{4(yNfL9nUI=q6vBpom6IBM9LIlA1mH$|@8bi8YA=V*QB7N@hRy~7(?}sJh3JzQd<*xZvLttdeR&S3~fjAynL#hhAL@y(G0i%v+L1 zlJPvQ(v&oO_IT~RsLaf-BP*37g-%61UDG{9>e=lJbDuc6(kC3e(pS8SdR8{p^=Bp= z{0LNd^e5EAaGI7#f}fLRw?wX>FPZS1Xh#3WJO7_}A=DiA^E_UYDd@4;)UZj@i8kd} z-6e_c+y)^lxqqD5^hqwn`lHVB%kO=>2zypmZRABHQAa@fj*&%RrAZ`u@~mSY36^@a zHX=3_x9MX8@`Nfy=y2$d^y!Eb=-*0_$8ck~kmI;A(j^9d8#4T0D#?UY#fx3j#QU2! z^=aTjaJcr1$8X?coXlnmzW@EW{UbaQi~QQxk0bISy~xLFcqzUF@6(-Ar`Z#w#)px| zhy^!pDU48U=CzF&a}#}#P_jn4atRzkmv<@sMQqR&Jnzu-Ve||~X^-PBVkXJ&UgmZ3 z5uYkNZ)C9=Nf;ksUDv0LV8dg}qJxRcBkeJ1cHB=)L5`X= zR}6ZLj{8$Cu*Q*}`g>L8^aJK8-K zvqrDaYRn!C1Op{ zsKT4uZ1G|bCwsY7b&;R;`p5r*Ki-Sax>k%IgW~}MZoEuY@1UC3O&R!Q! zzz=>T6zfquswPnAy|BE6T|uwd+SI`X!Ov zP(pj!q?I`FhjQ|8$9EIz-W*@jNuPamR8)Ki;(Uw9kG+-FPP>u2&LQP^lKa@P-?*m@ zEIvLyJ8v%rxW`I5RP?=d+dF_kNdEqOzo({T`DJ80*FHa_EIVc8^364;%C@hK!j+4Q zi^H(MKEbZYdCFVD#wbNAZF_ER&ZnZXat1@Je=Xf$lxx|lfb@}36fb{Ilv0MhTi{h-GhP+FtRfCi=mC*~j5I`?=)Kojac)5{d-dU(5TH z<^=^c@CgfNIGsCp&LQ&5oq4>y{k)*tpdSN|<;ME@37hUurv=WQUA2Pu7;SBChIczO z$Yy;1_U%x;jf|1!+{FTwmqM-SI+=ggkBV8odllZEs_Wq6>FK#8(dU_65h-Tnzdc_g zY;fVig&i_tPQm>73amY`Y)0^m(o9Lrx|f7Zq*JxcT+5L%wa#`okvi zM7M?Eu_$pH_q8x>!(<7E;aUrp+jD`>p4qS*I^^j2<oQ%*oTIPs^M-HS2im)*lDu%H5dVg;6F?Tug25Kt)JB#{R?l8%FD~~386Ev zy#&ptK0PBSBjq(U6T`m#Z{EC7O3SUQtu0QoQS;tfYNy48h}(955|hl8^;*xFfqP*X zL?W%G8_{7x=G#aJ*d?DaMbSM4kzVi*dIz|JwmSe*E|X`Djud zk|mk@%-`RV9D$`P)_u^FWmJ3aLu z*<^9DLzDt(%P{iEc8qrzsZaayQdmCmc4goc1ug5ULJ+%bAkR!B@_n^U(L7{uc&U*ku|giL2CUP{*cq?nw_1MC@wDc zel1~Vc&FxUug6SDo-*&u@>G|(?w^|?Uh9h=U%h(8PEJW1O~|omI)IG!6s^c_Z*R|S zZLDyJ7?%^>;YJ=N<76ur52#}yBVAKAs2@^sqVB~6lTLCR4Bi50@XgRKm`^Rbje@9! z#VAi{X=!_XjO+#zQoPirCuUX`*(!YcbmpxaH*S0+y<_Gj46Z(;ll$cU-Jh{Wa3nr3 zAYj2`GP81>Rm$;w>(WH~1!_h<$2T`}Ehn_nZiwYucVyj3bPnU!W@l$+Zik>On`%nY z&d472+CxF%of0~KMOk?~xMz}MY;5eLLE$0~w@SDS6AOz+&BYK{%rq&d@s`h*FJBI$ zW)|{X{}IN&6-FqFYzw$_km!oc@FmFosIjVmvgRa*F%)F!vx6_x#q~ZKe|~>4fXQj* zXM#$MrJroA;(|Q>9S2bz>8=aDgL_Lxgq&G$-rZdaKu9cJGnw#2K*?jF;8~&({u^aE z25|@@3@mh>9{F21AnvpyST03X!%ugA?+`RJxbyQF)z#H2o8pxmqG`TizkL0w;o!h< zSk#4lmA(9s(znm(K|G0E&G=9bRvFjEGWVtN6obqR$OOx+AFiv^6>uW~B6LcBtar1- z|C!qH|HGr~4I$+m#D;y3e{->`5SOKf54m{!9R4l-ojuc5DGry3vp2y1!JCAVRttx9 zp;Sp(`y`Fva1tySj~%2c7ubV67#-w;=*Mk%*xv09>tE_GPB9xPe?aw5^wookHh^K z+5Joi!BT?QV-^6*#ofqN^8(qt)hkn!K56mNKwm6PkU4k@HjXb;HnF)095|fIS=ZS( zd17n&FYT$$T%%OjpC9;_!`K3$kUSDf*pFx-?*(xfMCfQr>N7s(Cu+|bo|Xxd%`yjn z(ap&fFNA()jr9dYwqC^e)*)g%LO@!wex@@KxfDyz%o?RF3ff$JcYsKLD#{jse3}(4 zO%C7)2i%tf4<~4TAn>u3+am_}cO>1SQ=?K7C$sB(D4tWr1zpVdiep$vP@|3}k-8s? zHRClo{i^5?HL_oO`^ktO5JSqVNApj7i$^B+d_0do@uw^*Paiq;z2y?G(`9@w`6)HL z&~JF{DaEYV$g2~$LQ&{q-afDtCQc)Udk*d~Xf8gYAe_*mrookZXG&y+_PMqf{ggZsi*jbU+jHuRJ zuz)r&=%d+gBUPpLxBMbae*4>5{r~8Nc^Yi;3EZ=b#~XVx4`Q(xSP3+xnBxZz5y=s8 z%zHBdBTigHk=R)rj;|5Qs$eCC;7Uv_W|oKe5<}`3m&eHbe*6)NR|L(d4J%B<4&`Je z#Pd$(v>hU*&p{7Bfg-z1PYL2(d#i^Vc_Z}H7kWb`I8;@$0r|m@?}he=P|+zS9Kq%{N1_il z;hz}yuad%EW(wYe^`j5|p}V-?Px|Ix<%IQP2)>5;HgWBS<Hu_|OIPysw{{<6DarYdrQ`P69HCA`p2^FMJHSsfYcoP&4ciEm%Oz2|>PF7APrwxpPtwVx*GuVVbu8MzvDDXOI z@-98$4Q9d`VfQp83m6iQ-1HQk$HPc%X%OU5f)A3VWL?kOD`%iz>8SD@x zRrv|5+pqmzJ{0|�cF~iMrcf)jtYV8oBDf*LE6eD;w0k6&(2n+x+WY2#)!OhR}EG z9K%6b;K0_DOmf`YxWAlSIXPNUB1h*r@pqg<VV^#w${TArF!P0XAGXGOF^9!F$5WyTc80>?2DkdaT zTeHt;|7gTHLkae9A#xoG*=!{$;tj3N7P7nF-oK>SP6!l=+7a?5%rvm;NQ z{E}~}=zw$fkXM}2$Cty$3~{)51$1}zT~n^Z24^Eo!*7B#bqW{@NQ;=~Z$d?kB&f>O zMkA;E8k_r`?Y)axR}@K<6ZWg}Kq~yIxObE9siWoX(YGT>qxE0W9T`dZF`mn&oE&@c z9TSmCh;&Q~e*;-X%5lgT(nfyxL#UKplXEdiA9L#9`ePw;jOrtDI}bIW3m!t>JcuhPeCoePT4w5J-A_V8Eif-=#Qk5v8#g`y-w9H(EcPWkf3-o@4 zG?0Df@eatDM=_SRYP2?t2-VKAQX&c0tw@fa{Hh;mZ6ye3TG{l zbmm|%^Tk0!m&)^a{9YC%I7+-+|@AeVBk-U=$Kc6}cVWJ!CRp6_6 z2KJKRvx&$mR)KC{rn0szm+WJED+XGHh<{+vjgz;zhr~qpV7SM zslb~{v(tJxN|MKWy?HAw00E2_WWqpf;o9md3c|`W>7|tA{pQDozN?0k96M!{AS948> z>{I{0zKZ<{Z|j$y(}3}&zHlV;$N3LHh?@+Sm)3Y6QLAyOnHfnXKXskgspEAf7JKb; z1(fCU7AR83BE1v`$@-5I-y*$xXOl>wPDYM+yk_IyvyvkMpB+jN3nS3+Cs>kM7z_CH zr8U7f+~3Vd2GYpP$pq&};WOLp`{PRV-ogozp^@o%3<)&P6sp%z|7TD(f5(iz`d+wP z;STS~?}u`KaLkPS)k&NoGE8=syk7`WCwJXjepjMG5n>W8uI3#%$o5griV*Lc)$A>Q zshnJG>*o&y4;pV@c@qv1nel^Mh~S%%h|+z=C4}+kdJnmvp$UQ|KoN%)IwEc5XA<7S z0R6#IxGVx*&Hs`=NuNGA4L6??mgjWQQqwU)C=HdUORXFP`Ny!h>RlrM@DrolrjXns6NBLlOjGtHW3+2oG{S?*%ZQY*GV4= z&c}F?aZ)MhA+blc8t#sZ!tKnz89|A4W5re@jhA?HFW^Nm@2w;G6ExH1YeESuZ(f)< z@Z;an6St9G(Qp7AHidTtzG{;2(ej(f1x)cB0qVW8v_uF1uBSE0dq+xH&UKmTZJ^_j zvu-w4F)=aWHvj$^jcE&<2m$}YEOS~2dnBULz>|WLrd$%?Aa}jayX2cG)UxVU{locX zT+#@?S=(|UVce3*=0chP59Ej&jZ!L3G~zV@bAQlQf2{Chc#LvsF6m7@@tec-jqaUjOJrCqQ-xA29APA z*A)mJXbzBV`@7K)V;B@Lc*YMHlwwAk`clW=Y9MC|C*N=rRgub2!Wb@#Re>3MBr&v* z4B9%goQ+@BMX`XRqvY8povV5no$-FOj@qU6)fc&mjE&sv(s2xCHNia-NFZMVVY1^s z$#Xwv^{70%`{L?7qzB{q9@5eow!OwYi>xgSZ<5MbsAYY74{7))C#&G**@^dX_WR3^ zB1@S6Xe4k!@u5=W{Rf~3Q$d=PlT9dR%=jHcdRf+ON!7?QGMW&&-YXEUgS~_4Cr1L1 zcjP*B#_x5%*jP#%;?t4C`@Al^%tlU}ZxDs{$%S)w;b|2bI`384`;xlJ`$Guh=WV60 zE7w&_N$6C6usK6)6VH21VSK}O;27@hCQ9zR6&^w8F*0_haA^-Pi+As!SVw`9;4%ic zkusCaX=db#{$Xl>VZmJ(=D)u)?2+eqMBy@X1t75__3lys-1LeuKU|ZooC8TQI&k-k zV==`x`qr*5NSb?RnTXf_LQOFeOp@ud08M1szls7$LPDRE-ubb2)5U=qQ0(OZ3bJPs zTJ9ipkTc`N7mFKnF`jgsU;e@#^vk#)^W(K!-E5Rl!E-|ry;gb**NT6aN!n8=4YfKB z&3{AWxcR4~=m*eooj_Wj4pHO3b=K*Vs?;`?r>NgwPido0P!3yfY-q4cCRw}Aex}^e z$x~5RR|;SfEZnnquh+!TP;TPYH|GOp3Pxg%>E+wfh}bb7Intqq7dKh5XnEhHA>?6? zS1Si;3O3;US)wGgw>mUlO86JhcwqYZ+m#d@&HerS_WM11Hg$-e-u}3Nfb{}UI?^|+ zw3F0cV>VW1X0(8yUVC;_%pdsMv`?lr;pScWc1ByHP4U9Mj~|;MKg(cA_jD2sa=2h{hd$!cd(K)z;jeJ1CCV+v>A{c9RH$2 zns$|wlbfBZ_D;3={9vzGD8cX5 zENP6BxQ#YZ(?^NV1=_+MmL{CV7Bs@=JdyIgzMaQw^Ut3@KY;K7NoeofW!IIL25QDi zYqCa?t)pQmKK(OVjrA}g!T}^5-FC`y#g~c~@b8Vvw)Z1Ifh!3v41suY?h0Z*Q1Prk zHSe{Ym;0}wnwK*vz)P<4UH_o(V*ehmhDw(*Jvs;tW)hj@L-xi6kOU|j50tw?>eS>c z0jg37gou?*V6b~)UH|wEOFK{QJ1TCiQzz*#Tzmo-fqR?PxeQA;i6F4-#(tDNSah=} zX(&&THL@!lB=RDA%J(5F$Y@;Xdd;)nSVkTB&DpP`^xVpuxp#kmX#y%$N6u}2FepVc zdEVph-B~P1Ow?_81+H^%SKeHB(s>BD(a~3@Z_^e^nc>WLll>gzI;5K)_Zv|r{{%GC z;P6KhRgSn#=kaJ+59ifg4F%5lJBRn`@74LC8Y5xt^zL{@KF!s-PH~i>k4G7LtL{&8 zF6n_Ut8~%CT>2(dimLx4LL!wB_*(0ZVHsU*I&&^tG$&pmLx2W;s{)#d542Y=b*zd& zmzn(2PLnTd2iuD zUc4Yje-TbOxzh71(>+n-<*Rc8`PY*PL++ryNRugq@m*IlDs<$v7^#0v6UZ*x`38<< z6vQey|IsuSlp~sFK|w*EHl~Z(B`jM`cH}mYs@!(biG|Aeen%e%*-46i2BaizBB}50 z1%13rD!K-_EiElS6U)?JOSRsD02M?yp*K{5L)G&o3k3z&3{=~gKfBrn}%4Ekv z&rI2lcR;QjkLs&NHQpfzIvzvUOD>0Y0gsP-mAWy34e2Cvy#_4=iS?0MUW6)xZv9H8z7^DvAC9Edo> zFm!%veJOT76;7~xIX>OlZ2u}VGy-}`JY0eT*`({2S5~wEo6>>T?@B!HOXY(?C+4T?Gy!T9Q*95a&aI ztdc)FYijsQKwjv|vXpS0?eCmu&)_(O#b$m4@o1nSR^Dg}(DJBpZ{md~v_a-3CWnBD z{k{_3MR)jAzb2>`?V#@2#HD3W2WQVx`vbTHAu;z{4&v9ogD_jPzPft6;kC3215&*D zCtw3rh+4oQV(zt2C(%^~FO7o6;sX?FZY*43oA>tS8uyhhkY@5fuPuylOtvI9S6a2D z>TZuE#a6IN+MC!{hj6>3frNO`imPK7WWAh`if1fAa0pwASUE2yvsPQU%QOJ!}c3u8GxHT8PFPNHwqtTzu-)RYXPGQ(SwzcN?M@_m}+974jwherkmURbV6 zpFDYz8${5C#g&gYzZ#~1VEkBJe(TJM6gkBpcCV8lTz&(Imm||O7Q%%3AaHkHR+|Ep zalT z|Lxm1O|p;j8TVk79duZ;DMI?6W=o>i zd9~B^RrU4t0~Qw@S65FZ99uzoC%3|INg3C=;$l9k@~5R%*x;h;~-euBzFTcArjnZUgt2?+^#z=jk!kjg*utHDBIEvHo}J7vDI0c7x1j~NkKt$5P#zLdJ3IH z2mbtd07{cnC)``#Cr`?-P}MkgWSLs=T#3}Y18VX^dIpBC`cte zWKhr?b=;X7sN{$|b@NLlv#{YJJLF>z$ay45$I+|pKotx%C#iFkfsT$F?xNhYryC)H zV#s7~qK_}wEhb_}#nk<0h^>ucEn!cG%KWSDx>P6*UIvYyA&gK;NyomJuMZOaCqAes zqUaAEJc#OVZalz4QyevP@M>k@p%&YSE&98- z`gMM7_dP&hW8|b>o^?3a9KL>+9f%|q?=e0^)J3Gu_f>s2@y&cnLLT}0zTQKGg$@-n zlJnkN-952CGc!Kk0Dl))#4JOtZ8jjfwqk;Vv9sXA5EC^jg=#a;mx{6Ql>J~;*=wu| z)THk2Zf*&HA?|XBiP`x5r58Nv6i7Mj{}NGA(XCKIeoI6&2&O_vM3BtkOk|<7@UHe&majf1iNc>wlM}f;X(v(`*HN#xUlTe z6~_362^>8SE*}4_J?5~VeF_BOY2>5#*80&zQwZx%9#A!EAaNjmXKQ5`8f>TK_kOtP zo~oBG^sU7icBQm(xIlpw1mJ~EFl@{Xy);VKfL+i&FfhOcF|*?9wsr|MJV1d`Rv#si zk>hrtr^v;wx5Q&*81{}Af&k7F>T+st)Ivni1^D@moxa;&&YP%v2-Eq#tm5Ouc8tuvxHN)gNmIHMk*#sTKz&a? zov{G$1mhZ6v{#EJJ5i^Py|a#y-?{RB-P>6lWt;*Ez79#9z-*bn9D@iRnF)6i^v zYj0oa1W!yK8L_IW%9&p~-BUDWnN8Yx(-?A)eAKqw-F^jycG%VMTB<3i?7?8;ctHoX zCmp0rL%ULgp&wySD!+fv-2(O8V{yE7BK1xI$-2tYNI5xt*e59|Dc*7HXKbi|?!1n* z^(4>mL`5K*9?0g&4_}0Y@G%SNuP%aZNOmB*L2jd2rXB$D;?|7M&c-k}ks9VHP7mHd zw@e14CQI!}esS?UFStnr2V1DmM^n$D5d_MOD{>)}Ag^;d(S!VOQs|GR9331%nb)cQ zmhM3!7qU%I&J__a?u8fx^P}#drl$c!pn}95tmqMLf$AP&rl$@If4 zl68Qs`Gs~jc{BBBe8Ccw8FqX(ax5lEST1*HN6jx^x-{FHb4McxFrw}@?#gt98x&>D z&-Q#|kg7Hy^Q!#%5+j3f9w=uB>uXt;m_ZvmZ&T{0Rh1x4A%> zDZ73`%MomWah_b#;jF?Ueb>8pKeqcb@(a9hdfWB%sMrseiMF&lpY^dMDMP~o76}dI zV`5^hrx}z-pp!D{5OrA^Yfkc#avVLl2tI~9!^~S3`FS2GIxAnke6QMr5?P+^*;xDi z^@$B%Ch4zsynA;vl-WQkL*&S2=@(DVh&FP4l?@{V7W?~95EYPtXJr?j#lB3-XBaRIkU5OixLA=u3QP@iE(}VApUli8C2gqWMpLa26i{{ ztUDHAPx72ScKoeA+6uBayce_O!TH9$_*tWzd`M~f0=@z>y1$<>K{Lm6Ux%-`Qc)@k z@Ac+Rdu&H+XsG5{5s?tF=0VL0)>ffpw={#I<$!|sZV8@ooTWZGz>Bcgy(&$o2QO7R ze}Wp^KWs85uJ5cdM?6wY-fxr8l4O=5bW}d=ADVP+=lZ{EEq{p&xyI%;T@5mYnPNxV z<3HmuPqmaw>@wF+5)UD{sn$2B%dzo>M^`hn*2D8~gbLC^M)5b4Nkr+DA@lDT{d||( z1L8yRnBt_+qqhqTdBbRpC5b{f!Z&Nss>kNv)<*6e;AE6qlW3=O-{X;7a*rMxJc2Uo zKyluoi1Y1uJA!}r`eHVM1f6e~zu6#X_@s30M+2hef9b-)l5(<5MMiv_IfL#o+^~dA z0b=a0yYXM7(@sy0=^0|if=DG0^=?9F@8WmkJWnQ(z12%7z?la*kmJDd`f2Rc(ZSr~y(n%WI8QQID1 zuLgt9MnRWz20OhTZo*#p9^H4L;4S1Os7>pDzFE8y-er?-HaBF41_doBj>(o zp=33cs2mDT6?0S?Y*lwq^PfYSUcTBM;4~;4w}q> zk&t;Ea?lLg!ZF;ud9w^Ib|Bg+o(Jau9WaTcKl#m%;WKg-*bDyf38>>sGFw3WBUQguktm+@HwH{s@& z4$S}~GyMrp&9K;Y?p8V2Q>_%0lvdt@&u$jlir%G_6?5K0!BM-mfgI|**2@7+_0ma? z@iFh9LA{LJh04sh#tD3xG!${*7Y(y6Er!ol0~sh~v3aB!ZhkZ5XZ8+ARuw2wWHzF7 z1>d&&b&IJ*}+SWK#Mui z0<_~bHLu!O`5g=brT3Sz`oNc@EOf__sy?qTO)v*XN3%YMUj%{9kFgNc304Fpg3`eC z*52nj@J#Z;;{$8OP-rr+do4C`fI(?9T-d0$y`e$)9@vo=u6X6%X-;g0rL=^jTbV;S zT5nKb&ut8hQ10?fUta>WBtBl-Ya@EyOEP{WKvQh+*R9#FbQuYWiQ(c~UQmTFQ8Nix zV36S8U|HC{_RC<4m2`u``E#t-atAf|V~=p*jzI43m4*Elkyyk*rtn(IDG)m8j3%fO zW6SOhD3VAfU>DrhgCfNZUF-*7?Y57(gOi|7aK)U$1}8E6mzVL2yS@Dflo5qn=atL9 z%m|!A>0fc5x!^HnnUA;>KdrF2lPb!hJ0_v|41WHOaSYD!X{XERqZG31L|VQjqJYxP zhxoN7MdjtoOrcJ-f}A+~#Mk%P=4gBnU*Q_>!6r6;W=_Zkl^z>Y`DLJJI z+I;!)MUes=nf$m3axyaSTFWl@x;-uLuML<2&3Uo6x7X^y!-r+low*Yi@+KvKmNU>w z5P))4z_$0l!y1*?P`vt2)~E!B4`oEXXV*#@*x1-y=Lf5Yp+(xvhB!4BZ0F(O@!;m> zc7r{#nhu`z<&}knA{poqqY@`>6gu7+1z7NKoa%IXTUJ&^Muc$52e%uqVU|htuZ58Y z4^zOVwXF^al+2gHM(Mp^2Np)P0v~S(*S2fduFW`4ck6~j)tn-D+HvyN+(QVG*Jq5{ z;EqH&sME2_d|HNOq7<{~T+D_IvsDBn@O^dJr4}NWQlO`WCAuvtMMXrY(tz>ssM)4G zaQN={S2}rp37})|!+Th*RaEgNY_P3yKU^NuarE+uie_I0`YJ6@`N^Y4o6!*wW7LQ* zHS@MP+M9zf(7QLh6h7C{T3N~C2E;{_YK>Zq%-Un#%+iVUq7%oDAJ@#vvjH$*iI(@- z{tTcN6yWcF!%iTO_xcCTvDOsrvRBd3#h*1pLS93X`2OsO=&-RC5MxGts6)D`3|fd= zD6}5XM~4}j*8z{iJ4(GgW-#Ow#n++GJ7eV~vrsFlQt|Z2!gp8+OEs7r@KHOB4R%6@ zZQgpC1KrE!>E-nTGURqq?h-IaN6&h$&fW($Vbs;gs4o;~;48VNib4NA`e3dcgDKx! znY8ZANlr#*9bklDA!=H0=H~`XoEs)YgOf&`@TM= z@kj$Qk_Dv=U!|@e2EyPC7|j__Pw+SFX)L2Annq3eC3x z&o5Kzt<9Z3kel#>+Hh&HkbWeb;q&>{+CUy}0je&+b<`b&-N%a>yB%qucU0MuCq0@x(ub5w8if7jcjOP!p0reqd4+5e7xx>aNQ8hvZI=!`R=%hC9!bZOU zHOG1#Fajx{LvLp9XE^aFs&i{~AczGvxOdcHuJ zvF~TcB2{O<5%mZUP#HdU1%#> zw>Z`u>jf~|i$ORzI9R2ft^dH~npJ%Nz8AQfi5%G1RCIJcT*r?Oj>7J3gHTL{jq-wm z`^1UIloS*((_Q&9NgR;5>7Z8`RRja$#2W_iF)}jZlB^mbavIo=qmm|7&lx0_I1V3v^*(f77R z9AtOkcD7r?E)j2b29DwY1rhE~r!2ktUkA-w&jE;RJCodW)cp^D77_h&`|Fo4YSSIr zlt6IvN&{frF6b!*38sug(*F27;IdU7(7QB=LvU$#^kB`Y0YNvV^SuD%@@r9wo8O?& z(NW6=9SCTKqSyvvWC6n|szH|;E8`{-hi)H~O{_;Jrb6bUpj>uhCe|vkyFKD&|A+V8 zWE7Z~TG*MHD2X*(y;#ZNqkj|hEJh3`T&OFEo^K$nNKg_%k!6>7=H38TTnX=Yffcj9 z1aG8o5uREj~z2s<+tlxQxu^GoXo`p)KgqfXe$RB@yc3E6Grm#du5? zjOf6Q$=HTf3xjfT`#2&7x5_T{yUagFH~`^TZXKnWLFYFBQT=E7%66W^shf;J1*GTy zKhau0p^i1FEDgc$r9{&U?gA8K4P}czRX{QB={j6a)jSXMY3|d&z-gOaXXxeQG5rvS z4o?mo5vk1a1YVR2%7IQh_zy4Op7f5NJUMCtJbQ&8WZUqchY~{b2|xHnMHhxG?tyL* z^u3}YuN%~ymD<|c^WNUxFKa@1vo}z6z`2}=NfH>i0^<;H0Hcf>x#J%Ib-GnUK>>Qb z-3C)H(&z|>k=IhI2jn`rEg*t}ATEaL>;usf<6C8A`&Qr1wz=449$XhNb#)iOK4p6x z_g`F^u1lqTlE-{CPn4j}N8=XA=!Gt0KZhO^4crR{Lfu6NE`=Z9bz@+dtN9;PR^gR= z!-f=e-eS2W@x{n||4M@**l~c`d0$Q+jKhgs#!b ziWyN*lpmG8^^FuW9r#@rM#PFC)wje0AG@IQ=Uk`+B%$}5)O#B_6pkS(*_Ov~t#6dX z{=+OK*jY*gs^>M(Bv%iB+APY-E-UE^9KPby!>1(y=+0YOS}rqO7}0=PIDTCSg7q2%><0i(#cWb*Mb5BI_zLgRCm^ zihcRz9DkZ`D~6GM8-);EHEyypn z2iT;})H`atYHVy&SAHqvwF*UTZ0Ed8b93{(#nEd%dY!{C;pX}wNPW8O2ZX9u9@Kh$ zQot*bJpr_4)cNr^KoKTz6pj<^uK49-PVMK4m^i={LD5JHb0NEf8K>CGZzEJDA|(Ky zjldSKj6y|Ol7l7Ske zPZen9`KL!j!)aL0&c;UHl5$^spVao0K9~V*0Dl;s1qY!=T58w7d&s$} z9qR&qRokh5fS~?69`ZIu7|IBV?@(cT!FXPWIZ(r1NZh@Fiu{~w$~S>Z9|D!G88*`5 zPhc5Roty$de95ik1a4i5H0Jfu2FpQ^;O#nfkfRqnb zzfjo$-d6=kmaUYw>A|@|D+k&q+Td^2gP>RX!OZL z`xX|>NNELiRu;46X0eGW81l*DUg>sCC?}`r=FsG3Z3ey!4+kI5Wp5^vi+WVI1!(E6eriivr9I-vngd zV*nnQz|=P%=^87cf%Eq!u$Vec3bFaam^@MzL7RKICuwqL+NJDl zBk2zKZc8!+7$v1OZ#An7WIG3{kEIs$&7+`_{R+9n1tui}2-SSF-tUHjEC9xMGrxYg zp3)1ti*1Me&gKFO@?JCf=QCKr&96Wd(AhP{eQt){^$u_rF;M8_FTjW0(6Z_m=(M3W z;Fts+KvKqg(=`_=)*DFH6#z10loAXr`8(y4^{*+P|M2pi$AmtAewB=9*_m_amX;0L zgOER=&MrXhCl0md)%$2@>SpKWE~}}k3Ix|A zspDPFq7(6^7yiZF=IG)H?^wkz{0D$FX^ry=Aztqyw|k-nbC;T!wByBwhKu1 z(mgOgkkk+@sUgb8H@*hGr_yC0?3_Foo0PhAm#3ztytrg#ORgIJ;d(2LMgU(U{#^;l zk-V{52Q=KmBSBv(vaTUi zDpT>|7i!^67Iya1o4_B^2Ee%n41p%XRP~p@`bjxhF7Ntpk{NCmqdr;RsD(xRLRN7m_%_1?9|HccP+xGHRsPxeE~ zcR`|D1_qE#@Y>sNVNjKJ=R$lWp+J`nHh?Xg2%gvWL1_B2P)_z2&kOk1teB2VC|pT} zfKe$cG~pRHBUGVO{W$~3oCYwy+zz=&?M9b3^ldBZv2t7Ss07smJ$XK{^!9IHOZdQA zIMDzz+|lw5!>UbSSaCY*GOfRbz7G&}$cC#5#%2$TS+-1|9j`15Ye$*y2XjG=1Jr>A zF`o0)8r9X+=>CCiZ|halLknPRM2>*O^sx(@vHzNFEufwy_&w-rU$ULOYg|cACv(aU zNJ{mnkTKX6l1x$4Kr*S*9$=SoorW3h8+5E^PZ`x~%#5KoSyN%MD_s_DO8w-uU>Zz#$7B0W_yXEtn_+`4tE0OdFq!BeA#50?ytxn=`&PX>zPzHqgQf{+iw+@b)*#UKS# zWkMbFeQ0@&VJOTLpb!@G5}2jryrI7c>9EoJCv8@*v7gcO8HgPCy?EfJFIih#CxP*i z3+U`9Xo@?VZWIC6odJfonG&ekT0X#7{f?MLqp=Tx5CSUo#>{0^)tnnE(J!8Gc)QL$ z*h>wjri)>tKND*9HqWEP7gc}zWZrAg3~F=kG~b9679Do4Z$0D$-Uq|I#$iMA5Q7Qq z;3N_uR=4GW7|y#1D)$+y)(@>oZEXw%7aScOjnAL|B8wVlo9zwk?X$Gu&?|qw2h2nQ zG2#O^rv|)GPSxEFdr=$UTe7!GLG0!pi=)`Ffq{n*&yQpl8!u?R7z5~Vmh@P*{x-Fyb4cOz+-Pb3NI-ul7x+7(p0I|tpU~`)P z85l@GIjbwcB_wNS^map4Qg=G{vYOhmC6FHNa5Amxs;a4AEzvs8$;l}P59X-RhECxE zFtS-Jv0rRpPfDHT;`+J+dw-|b-r&-us)mS&h*IP#z?BdBY=loxMvmCc__E$dpEI%d z5!BMPBWm?OplvLA{P^*7?xlAN#WU#iHtqiX1REUv|zXh>2u zHx&^T^@1KXRjmwVBG1EinPOmN^;~XJ;@ENoO#I3L^eZD+b4k{);)|a?ot4+Fd>sai z#j@Z4@b<{hA*>Xa5bkXw!#81WMDV35!~ zL;&P`ntl7u?g0JmjXrcB1;R5>9~=*}P`a#d!UI9vV!jATLtrfdK~ktPE`9swHAji4Mcj5h^;lRpS`|9XZ%~QQ4qkx#d}F zfLSdRchNsuRav>Pt7Gmj)i;{>^yvW%5$M4(*aM!f;Eo!=0K4XeKEi+*Xih!5QI@uY zjQe0IQ>YOjxMZj6&yKfe^zlnc6*d6K%^GNlw&`TU-hU2+;SVU9S8F2X7)?+jaWoMi z_c%u3>ea@}Fi$cXTmp>PRCl(ydV`o%+o4?X>BPw?@5_y*ni22i=Io8u`N_F^7S zxy0`L9Q1=QUjmR>41^`I^+1J$R~fP3cDfJORt{Ksr4*V~pEM}al7YpXfWE8H19~k3 zF!6YS>)g(POsxR5@sVuDJF-W{U`MoCc03$QCqj*)+xsDER4o%VM?vcru@6DpmBY~g zy`!g^xvpxgitQ+i3$tbA`SfFD1Bh>H68H>iM zV;3Gr-}M^YOAerbD@=y`egqtIee)>1>;RtqKusqCal%w+iHJv&gI-Y)XVYsFIO%f$ z717a=kvHtYrgk9)3MkR(rz7C^d+1=PffCBMOC9Y`gK~)9d+GnpS-cK(ZJFELcV-rF z^cepH&UOS`TKZc+rAeLO=Ju7J`^Ip)d*c8p+6p}~G(1uE@JEqiAnRHu?6;r-=@awT z=pCeb{{QSIUY=;5S%hmedM#qAyaNJQNqXo!^n{Z0wW1BhvXEo*Tc6QOM(N@VoWGH< z7f@aYo^j+S!?BB-$GN$OF6(t3aX4Q#H0esx=T6EN0jll;ge(V~=yjXk;IuRQhVQ13F-adn;|IW^Rgb0&Oa_`=FK#^l2^m)#5e1_KsALj&HC_I}51$IsWfigB@z?P&u4~iAh`pphk~_ zBlbF+c}l<`UJ+?HvvvRry8@3|fGN!AfaKqNFM<29Dzi=UgqlJLW>}YIEf#;7&*ic^(j73+V&D{0``%B*p zY{e>Q%V%^UzdqlXDf6*V+X7y99Z`TDkr}0ROdx6YB?F(72>VI48FJ!WqVqrdc5o8L z{(=hJMJ4p)J*ZFQj+8+Q$*BoPMn6Oy=XV~q16_2ftZ{}dk2ZpU?uhfM<{q+K`if5!fjx0{Dnd6 zisvex!^!oT)xOH?>MTWtJXusqkOwW!>lUONjrsK(-G1mWGMwmf*wg=V?m7;<1m=QU z@AyxI2#w!14dS!CW`5aYnUR$>dLPgnh7M99P@5zp2ag_w$GP1>{R>DV=&VnK z3<@=E;K4evrtNmnbG3Z~Z&@#lwGd_R_3SLeKzFdPvSKs{#eYR#pU+Kz|B8tGn|0j5TXOK6CJ#_6A8_Gtsezl3U1dH# z2H5J+Uzr2Edh2R1!Gl`Kp@8}gb%W3?P?OZbcCpn7nau&21&$_macSwwIQad7AbR%5 zLO~K_YGSeqcWA92n%9T$@SZh$2pY39R&4^jMO$dea{9=TBipd!Z?Z+jIAT!fJn1tF zQWEv)R(&$mblat0>RE6Ln9SQynnh3W7M%cYd#uAec55EWqcCus$uG2O8?hZda%3$6 z`W;3MmNtF*p}K!-$J^6=cDCt2oRIYdGqB=NXm_OJOz+kk(52Em`tMU})klg=JcN+8 z>&rH8*wefY`@Y8&9z-R-IgwE&B>|P-njK^&0zg+1*yyJ`AwN<8zOimVIomdz6ALiR zN)ZS250HPKsCFNyw07Y=$t2QJ2-6Q6(hkyW!VFkv`{21E+dLAl6Oxi{1Jl95P2sZ5`Qgo?ycT`Xe78m6 zBRrFCfAHmL**6h4X;D#7<>yy`{USnzD-3F%_UiijwkCAyV~K7_bwf+Fs!(idBNn@D z|J+kH>AZ3DAzjR%l$S$JEX!JQ11`|95&+$Z70&EAgnfoOI2lqQa&N0f2i{l5h~CyV3z_wQ@hLB)qI%a;j^z_mW;9N4?0q`s_JpcoGT zcnw1Grq8R;dXDmL^LKH7DC6D-1*vD<(L9RO`ZT0LBfhl_)0uqGI~iy~KFUe$%p4Ap zxj$rmSfRJRRzUyL$8A_r7BH}SX+Tk`Bn$HA45Q$U#lF4NOiw|0+ZIi94m>NW0at&4 zD0eECspiwYfe*!?=)6F!)L>_Gc6Hw25Cj-J=?0EiO0>Ov1$uM?`dp+$5Kj{Cp^|M> zKH9l<@@Ev=XQ6%sbWg+CElohtwzK=xD~M`H>7wSA&>6_e!%b=52BIMl^=V7jc)=7$ z-z9Z*VG}Fsy{K0N*U*j-^Fd7g5HGbk5F~F&t^j>jdf5L(-Frqg)qdZiJ5-Tg6i|u= zkSYi$y%UOvpjZF_L8M6Uoj?$UAWam6NbfI7Q9ycAkzPWVj?~ZvLML#ZyubfBRqgXW#D2D3CYUZw z_s{D;U{5#^xOz3SAFL^*_5Ac8A-XS%?-b@fs(`>yx#9+W9H)IDVGz$rpn{JZLV*DA#KDOG zAl?n49Qi$Q!B`DA!gU05^75vlh~og$5fqMEr~MT>Vq6jb>$a2%vBVCvCv%E=I>uPD#n zH323I<`B@d$x4+!0^#9%C<9FJ%0RmYpf}J0L*=v8g^_&JotO-EGDU+IPd$tQ6x9HB zCh+6PX@D);3Lw(_y#a)(oV@@28`Y+tmMB?f>=GizX1R~LlQk*j>O};yS%*I0y+TU)pX^A4VdlR zz~}DBR)zaOD7Ox%94S>+2Mas^`HhSwR#u#V_(l!E%L78AX(GsS3=qnTEMPOJkS!$A zSXX6bt0zFprzyX$l1GB2A_<>FY+RE>|V(rH}P?D6>$AZP}4^|&~{m@+6BVKjDn-n{}K7nqTCbscfj zB9gf@7Ny_T!P-1e)U|bktGjm2dxmenbcKR@{6zw$Yu--^6@Udd`~*DhEfDvG9BzY6 z;Bh@ra%~aif~arnMjp3|Km4bpB(Kj>D*%XcR(rwTX9|We8$?jMY2Ti#)b)gI5e3^! zad@ys4c%HiT=PKKu;B()?E#pDg}tsQgN}V5n=wy*08jl7{%~YF(sG~ZcvlqNoIDPqa=2mhyL4h+ z)4?rjV`VkLdEvsNEU*SyAqyfZ2X(dEsXN?CyhfxZbRc0TD%7pSc{(CVZrl(k3k(#^ ze=1;lIx!ee0P@>NsgNHC>5Yz{@t~s#!s%;v&9@*M}V$&6KKn)F5 z2V&NkfV^+2FPrcmMC;_wo5aMCT>$ss*8s4_zd?6R8q74mXI)`m!FE<34J*8Hy%&7f zbkK3pQ=)n`K0aOoux1Cp-P6Hw#wh`q2(}$-1xa<|fB6r67NLhYH2^!kPO%qNT)!#q zASzfwTtdHqs*!EsA4$myXNMSD8v$YBzQ0Z7;<8yzJo-BeQu9-<*>v+Q3=#ca?SQi8 zEBMf~eN<5^j~+|80PacNeK&yUBk9_5PP;?g%o6Demel> zvdm+JhmTLg-F{90B!PqzkeM6z96HdqxX8Bz%eT1HzGMmhFMAB)w)nO5*rK;HaX1@8sz=)6&M+Cg& zzy4p~0oQK*0R$YuU^6(L!gLWs9?cnwQF#P#{y%0FT3CYHyS=@YR7PGRl z>Y(08VKL&HjhrO_(|Ym)qRQ>@G;tFKuTrAp0rBcb9k8O@ z9T4Pq7*G;e(kMeH#mz;g`XWsR3j}*hc4eym&-|175MT5(GnC?E!b>z&Hx^vO(E(`e zmoc(7a?}Ifv00bd7$q6474&|3wuCl=9(4x$C&r|gg`ea%DWK1|qZ$=c1=HKaN`lp) z`l$3r8PoFD5}^|+3wG3NETv3Zo**h8hJemh8u`HFP6g)TR>rXIywDR8994!A>dnQ~ zIT(V}h=$gtr?gq1zlc>q=w3A2dg<2xp;b;+A8-bvyKw zMDNwoHTS2`8`W0xbCs>>B%A^7R@FO)k&x0$z1dPGk_^li8|};;Xj5I8U;hs&;fOUP zh|o<>;jGg75xO$E5NBP&h-!!Itq1aCP{S>Aioi=f>HL++(eI-fvMBmY`leC?5|zty zTS~yGz@|x(&V7s|2|_~e6Z#=JwpjY8J@cQTy~9Lj!hdE2sWL91Y9X@OVFU{lnE5_u zTeCK+n5Ks!H}idj1eWq_c$S{RRT>o#H9dJ3q9Of4HOk-rfdrx<`9iK@#^0P^@)XM_ zxUZ6Uk!)tq^auSB3fMq>!fuND?1bob1ZnO?l;<))Jg~y>Akhtjb8?yHF>B0rcYeX5 zD8Eot#zM6Q&!BYmSiEhbwuul7dE*Py5ps!GlEQd_qXBDfm1}Xl)JP8uJ%6~a7P$Hd zkfmbd+cZQPu(!PqO&#Z6kjy-QHk(c*e>{Q;Uvd?BCqg_imR|_p@S%FM8}0_W6)iXD zn^bJQ#&b7Kdq+;6n~jdYk&7%sLP%8OJqashN%o!&S%t; z9t*>eAHF~GS0AC1*D=PqGa9l(8Lvr?KcBP6B0csv7kfKTBPRk~$Q0K}G~f@{m^X`* zg2PMRXcwYQS>q((5};VuxSX0Lj$30s18oY7R^g@Q2w*at>`;lAXJ?dksWB@@KV)G{ z=$^BUTt{jEOI8l)+2{*#hVEa_C=pf##IwbrWsgTA=Zi+IXp6^Prnn1m%k@r67)q>A zMC|+wH^k`_9f{sf?f(*t7%{kk7bp=FU`Y073!=VzSJDuikA4#2lrP}3PD zbwLzoX5a1nHfJUGLUrVXXJf-gXA$VAFVD_yK_@UIi_~x52IWh=f6ltbUSIBH zCBab;RBw%B)%IX_4TkrUEt>baIQFeI=JgaaZC$lra$P9D)?(Dy5pqnRZqhAlOrB>5 z{d}O3p5~p?Pq!f^AB4SC6Z5d;JHeWdspjS{_NOl$l*&jmm!5gEgse^}>wn+cL^2z4 zbm2pG=uVz`88j}Yg{KL&v77qc!0=S+N83ha1yRjV=MLVn`7C8*Cw;DoCU-EoW%e7~ zoz9;NoM{cfTmK=6vvN_H9^yQV?HW8G!JyUsT#+>D9&Xvl0v`a3orQa!k>C~L7jHY? zg$PhxQT!3ssO~;O4OYhkPZ!P=wJtl$&zOE?mSCV2ZM~{7FA5Pfa*_;IB=CdMZaM`% zQG{!xzeV7AJZB8hd#H|f#`5CvM{;Ds;dAV5*}l}7^Z4(FHjxW=p@&k#@&jB@5!5W_ zrz|T-*E%AbY;)|o=WtGjpM5)2kEuj1)5W7#_UmRpw!LQIN=&xw+%EmAQ)$<|lkYu& z1P8U=4tEp1nJZ7*#>vBFLH~rX=jrc{_Crrkygod}rq9VPxQqUw;u+{wJ!?I>67Ef# zsV`|9!&`JC$otzf^Ss)X-x7xN5Y!pntZ=8L&D8^$fd?D-;ZMRtHu_- zoZXzuU*IUA!g^?YRC;ydw#uqj`qg|uL-RR|<5dH6MR+MQpRk;$u8TOl&4M=;?2Jpb zg&ZkeJKB#MNi6tH-@Dqh8uPIi3+=iN2c;;QJ%vy$dQ)1${QLAWE>?r1o9Q=Xf`#O5 z8%~F2PL6_XTg)kYh(z!jA;w_7oK0tfxq3FNZ_)ALN|xhq&E(b1ar3(lkqi4UnUeU@I@R{4 zz)cuKohsAI64wC-IwUx(4pNkJ(aAW2TpQ5}aQH7+9c;#B?n=g06#ujFUt-D8d&R}C zdEGA#7=LiK>3o^VytY|bzW@64jF}cm`BesIP;@WSTM0{9pp~&X_>DqEQJ7#XyyFWU zp^n6$rKt4jCciQc-|ueU`ahW7-(MkJN>%mROEq%%emMQrD`g@WN|6qTESpx|m#+?P zsxCPr)mt0S(NkIX~apQx% zJH-@K68&>jV>gPWxGGY-57M`Gde<%i9~4hn?Lq_l$X_e>aG|eE*lz!g7MZ=};=@tH zb*uzwWj4kqs`HPAc8xiOr=f!cJ=wK{A*4n#ebY^x+)j3zF%AAWvw_wrVmnkAdnVSE z&$~*2wBhf_DpD~I_~;?`p1c?>4$>$>blP$I#OTe zqqQMz+|lIpOv-(Zfc)=eugl(fdh~p%Bun9L6n6gmXH7OV_3+fqW*X(N_`rKnY5Vw? zzp*u4*trA~rZ;l}i@L9a$&()yfJ5v$pI+FBcYPO0R6`l#m80U%;F&v_jAODT~QS`L(YrR)6B*kjVS0(;V*HsPd zQ>>4b{P`IwT(6{-`yMKysk-vOYcXZGC&($q@5;<4q|DAO_r1V{L&C0)#z~JthTAWU zur2r7(Jp4qyt}Nw2$o+1zikbi>=neSPaAl_P@PzF%lp{uPiIfO3*&1Fvm zxmALpSe5j}?wIAfZ$GNn24Q)or=J-6Fh15yk1jYov9KN-`+>kc=yysmh)gg@j-qC# zRF74dmI$de5CVR=SnZt8YQ6AnAy4GN2)5nb3rEdpJGVFP_p!s)^p-VaoUmh9gGw$d z{=1K&k=WJ?O0&n`E**{03_KUZ!<-&ur!H@JP>WQ=zH}O~BvVU7H>(7P@j)`6IfBj4 zz3PQ7gmmNn=GyO6q#LJez-V^%!qUj}Xi0S0{+~Ddv6B07)8=QUTd(4l>#^TzKIm!{ zNo&1LFj#^-8R87E`I`EWc1RlhMI7z%*t3(a<-sQMIa@c}@?U3qy;fjV$j>8s=NM8D zgjREeJCFAYWx21!#I|>iTQ8I;$n5O)Zf~ua-NeS&j^euSOloGo z9dpPr0GD8@IW0{v=(stiaN%gS;2V5f9RI>k9k(4#dQd_`VMB<2)!^%aG7##k@_1gl z6*v{jMe9J5D&tcAyH3e7>kD^JV@GORLJxF6b`&*8Z(M%Jel$kr0@ao-ulmd_F<0$| z)=|5kDm=nce*M6cRsW}|w$s>yT-Nk&N_ddjRjMA@-`XA3x6@2tpH9TO&MKA4$uA3> zKay`|ADELp-c_cU;l*n-_LTNTAh}VZy&}uaU~v=5E+0+*#yq$ucT8}RtmGdE+rYMa z1kWwm;o_3TFRqi-*i@0=(VfmvYJ$Os_AMnGQxp}}hfFtIss30A6&r6bIxHoW>x^!L zJuqr_B4+#5;EKDcumm+04|}SEiqNtC37sRZka>(~BFp?!>Cf-xis% zX~{;bDD;HxNF|UDuTdMGVM|~;N2o4^)m%J&G@rs@8q%=%oB^+Wa2sd0lNM5(>iyx4 zYoJ7G%=l81&%bYxq?P8cR%ps*>a32KQi6Y890=k_)y;3J=HtCq<{H5DM<5*L=x#8L zQsR-MG}d>0YZ&mB(4_h_S1tVl$4t&?Wm9NRP5?rpC|)N-?^5^@cH<7NmTz%zn!D{7 zxs(t4EeNNLT3!-4?%mR{%6)hV)z<=wFdmNs5Addn>f*cW0oGqvWbijljqT_%6r$v{ znJ%bcu#~%Es`@_ra-KhYFCD>KNNt=b)q1D;m}|Nm#ZdWf$Fv^{m#L9j{chOw#_sNF4Zr*1sOjzroVOZk$fFl(-d0s>rq!wJNCOp_cxk;U!m{hi0+tPb%bCH_tme{hatQJ~%a0 zN3Ga_feap+_ZgRd!Zhn5y{lqeeQUlvhLOu_BFG|B_IChAqmy z+3RuQ%*l9Fvgxa|@Ae-!WITW0 zOIW`O>U&>Lrzp2xI9}lYG7D>d)FXF66S0v=inP-HFKgLGs zOp=wwYSWuP5^UWhVG6G~HrgDs5m4%%G1A$axX4=i`9UlNYi5bHhcyOs_dgkS0XF}s z*!R8rExNeAW*Hfmk8&otDy`|s67Okn2eChqlqAd81x3jntd;_**mZMx-3o`0kd}1I zQIEAZN$nZZC{{Cdd(){-r*tsh_H#CSm4@tIS+kWI zqg3Si$n(xUoQPaM{33vx-)6uMnvA#wSf)~?R;9sQ87d~$+MLo+QU&_|?r%mv+~3RLp7i zdEKQYk|~g1r_N^55QFpQ@i1c-oWm8zg!78>PoConTqYMJ@c*1fwoPLGI$&*kO`1YW zR%gl4R^7L71t%PzK*>>_?A2u3@f+OYz>ECwt{i^3-ve95e8{ex>X*%`r4!0k6s>jo zCRh?*cX~aelm3(vNvY?7!43o!y=uUA4|z>hRMDjlroQG%+IbyBkhBOEj1dO##8oHi z)Fi3qb;-#bRsOU)nX8kE1vKtds0A!D5B_Dt+p%r6rOQ~J>ibyM=K(*y=d{yf%UBi9 zEAjv*1*P#0FI>U#MOAZwWh%<&qeQ%`dJ#ugUstD_t@S^!ro5StcEc%rjlW7VY#-jZ zi}Ytj)n+!nFHws%t(~QNAM-BaWEZen{6n8JmMmh+jDopMgEOt1bJY2d?!ERtj;r|Du zw`q~)U4{Xir!`3$tKm}-lM0a?J%1%2@`^wom>37%Y78uv4C-wHhTH!$8u-7!gZ~FE zypIKT3rmb~Dn6|iqM)GVUx-o{^mZ`4g9wCql3_Vi6rlv@fTZfYg)-^|)R|xqTywom zJ_&t5>Pw@-j2mfG@YHi7Z&HG4A(2Nqg!N*!O)IzonTjEFy{Pci08s%M<;2Tv^fx&Y z+Y$oh{*R6o47`+G?({)%ROeIOyiLdU4Z5a@f(O!P`o0fM{hsZmptm`BuAZm3$A;p@ zk9O#S`h#Ma_CuZAYOx>V?Bb1dGklPB*^9^QD*RBkh7~0dVa_d%z*<)|ROPQLCKogWivLsSQL2BSomY=EYQ@Cuea4oV; zA8G;i^;H6hS|ZXeWD&)ibrV(C!*ob0a6a`RFH9yozUEKX-%?(nkM4rqQ)ErnfQ;a9 zY8VZ0!SvB|tbWFu1CYfR4kQo@>AlAYCMae1Rp=!~JJJ~k$Y;s2>;v@VO&cpTbq|2W zIgX?g8C<&JFswf%ssKBZsl*n!Xhp9>9E$hpm zNr*2+(?eYEMH63A98Jc1bRFdkQvV>>f|iIcpFh{vp&C0eX5h>RsX-Gw@QR=id0>xB zH{&xe8Yi_cgIb3hjJn-lh(Y^CYO|q0yN|dAYi3;Y1{V*^6Eu0CY7%QRgF9`ILyebt zK09B9E5Qw@j$~O%pD~N!`u6QxU~f&>&RkkalIX{o?L3}BQ)GW5?{m!>Xu=iAk5sDu z;WRsd*k~;<=-=CCEero{!w`Iy+|1B%eK1h3E<oUrs1<&9<-p`7x|J%mE>r)?>^-8Yh}N{tkNV6k$5=o&BT4D)r;Li__-!5&McUqh9U-6B@H6B-2O57u zo3dXzKR{D%7Qv$uI-z0mUpix4buBmU5vi^XS51<|e|R&>BlW)pQLK6u`SrnSf(9Px zz*i%S+&>q7*1QnY_Z9MKjXr1@?^$mhFTZ)S?yhwsZ#kxq>vqWf++jPPw#=y6^2 zeS__9%r#v6@SzlJ$t1@+-N-glcVCpIFD ziO_z7PGN{M)~v@Axhe9%m%1HZA#&cL>#o9sBibG@MzF>W!M7Vlj;iz#7cUV2LOKVx z+__(a27K5fxcjIt*MAUHGXwxjc_Z4+JLDTgFM#~>sV;&E^lLJC4s4BnYNI)fhp#}U z1{+u5-!g6aT*G{#M_LM^wqN$st@ZbvrH%Trsi^bsuA@s|bFfne?Zr!3|6=uTVKVe( z^0#5p+6fcobgNMM(%7OT{~DqJl2Px@9m)vwAoQ0KUr_cPYZ!BSPSLCL%(`q@1$V^f zfV@cZg-XQ-dY`#!H5JY^b@uZEi9b(VFn1H>MML^vatk;r7yE#p_7&HygS!a0$k7Wb z3rak?ClVS1>2%IwdGK@4Zu5lV3Zp+u4k?P>!aH6N{s&t!qK1=Y1v&$VzFuZ5_lpvf z4Rt{;_NG=y$y1$^<-$Gq_FHt|a}F8v3vt03Rv7eP*$e4b8H z$=;3zZg698iE*^-Bhcq-UZ0VDu|@9AdrMc-h*!6tu}16ByIM~F!KN?wTs+?zD?(9= zJNtD=`M?aRDljBd`|H`TK$G(AXyd}LZ)k6={)HQSc6c*?+vsd+SfBR?bm{t9Ry zVlQGMR?}0|h-Q&wYDh-pz)@6KDx3HOrDQekXvO^akJcjN0{(CExS?{2FTCMZ{_1D^ zHGXftYxk#<{0%i&;VfG)fl?^2%yNEGd~-WcAtm;;Ewbl zMP!~6)Qp*BjI)B0HF#A76X5=qJNgh!w~bDIZ>B>=jcTJ3qDKlkIve4K=v27hHqcol zqszY~J&+=AQolH!Z{#ficOhk?M1HBIP(k9Jd?&UkJED>Sma_C_Z(JFg!m%acVkpW? z`X857^*855dp?q9$)JK$2P>C5tCGu(q&-Rhmc8Hq6E!k@v$Iu}A7A8?(MT#U_c>Ty z8VX1qs@~!fT6tAmpJ7{Fo$^3EfZg`bs{H3*+{xyBNHTRWnD)2wPR#X_OiF=9pIIfx zMYPq}%_Z}!U)yB3_OqhyyGMMeZ#o5hmV`@h*N(^>FSBz&$owZ}ESoi#ON;tm#o?a{JBd zA1c7h&wL)fEs5g6N_*bW&y!3r7%yGbMW|)cpKCPGiN0~s<)-HJ)PI^3#ZXU+BXSt= ziG5F?SmUlgHIET_(2J!HGp>>t*Wq)e+4iV1CW_-cSZB1qtn}CPty3>>waonN_|Jjl zr4K1bN9w_iat>tGH&T*x)w!eOlO@fA4S#JiwwLovS$Sl>pTK}l!iYEMv=$sWBnhzL z2smP17IzzO-jmmR9b5U5^E!N2gI61|qL%0c=s+`XScJA8^vy#2yrb?OfgLpk-%di4 zUCy32SLOe0j=AMxbArDZ*9~|7`A@*9@@4(#o20H(DxfVCW1J_WEI&NGv&fM23^o(Q z=+`V`y+z@p4a2_-y6#cRsPgmEUaI*37wRWEtzle_qJD1CwhN-K+exGs?UDhJ9N_Mqb;`rVjV}vxJd(e?d zE&EG@70_daKT@VPvh8uU!Zl+=5aW^=%{+Z;BHP58Nhuqx8-rmyG{N6fpVWoPz0o6D z=JsoKa}D`-*JoO3JK&`H-ICQ<)n)(>1Tg0FiXF8aa_wAJQ^NwtBpM4q40uW*^VIdH z-DzspX3u-Nr$RH&E^nZo*ChSb7hVlh(eEr{lM9I<#j-W2>lEn^YjBr)wzrdsD`Z7^u|{WbG?5! z_N@JjpSn){f+Il4!T@=})+&uIT!Gta@Tw#9A{xH@E*)N1np;xZd970*35g~ZRko_& z&gMund)Xd^pGJKoH%sehJ+k~T-1aMBNt0=h0retxP+7`ZlH};gJ5neOrDP;Zf|QKB zRIwq?nt~BHpOW|wX-qPbR~{hX96hsL0MW~CDfT6#*3&nfZj5UM#VQ+an=U?!S`(#Y zpf@}7c7OPA`}%a_3eTMl+^p z=zTq{g#doiGbsGD)przE8Es%++1KNlA#N_Xo40jL!6-|edZ6@rtQRYhy`g|6=Fr z0un}iz)glG-!kifs8;iq>F!MW7bX$f$mcgNV|PbJuJ>B8i(YkMW#m&yw6G#U;p7kR zk~4gc_a7Y8#s}7fYVaBXt4{Ssz6|=LIY%lr#0JF;il%&E6x;owgxr;_NdOmFeA{Da zJvp_SJ3xm_G1H~#R!QTyOW*qU?c>$->ABtuT7$a!uk^H6laP$gOmWmu17n;E=+7CU zEsc9neHVIVl){OZ>|idwl-)zfu>Jc_N&J{~`Jov?NI?=9tNM37 zn=C|sb9l_bH#zI&bu-^=;>98pYP`dPqYYEiD0xg_x^o0z#z4YkW$X!8aIs95O${B2 z^s#2V_a)^`$>U{Gi_4yU4a+c`*ih%eX2%~ntQJRQyq!R9j4FSDZ0dGBksV10RL0x2 zr!G=QlLf_UG|HMiOBqPiemtHW11}vvrG}=3ax2i<40sr^7tXERK_Oi8*|SI|-QHai zw5c{iQ#JjTKe+etl&!}iAq}|UI^JctCvEY!?SfhC?bv#e%KEsU?k4)j=8!AxQ6c8K zt_SVrtA@6*lRi$ps^5BoUZg#j>#LnKFH(-vF2y!zj{AjF7a9B)kSe!yjjNH~wm}oz zi=&1DksC17Et*0UYL`((&Ntto_)R&}-0CNdZnhZEUD%t() zvNPY~rmSKke7v96^&>M9!qwkXS+RRqDz}OO$;)1ft+9`bBHjOX2Q0iZJ+H0G)L zq&?VRW)f~tO(6_4HPb1G{y?7{8ywwu*IcSB^A6X=$hJs$zX9Ti9clKuxW1>y8<(xw z8My_f|M}>PapLfP9Yl>vujCV|CYA^4xerA=Xjds@h$APdCkb>GG1Aoza1$uwilxGkC}bNnhcPFgjzfh z$voKn@!aqugtYjZU{r1QPW-tx|6H#*XlrxMP-ne#&($D`)x-In1IBfu+~Qi9yR)S< zxnkR#V~;Ok!0ni&gw8tZcko=ZnQ3T)RL5w_)=|EV8#4SS8|8atIX70@nP%DH=s;&o z*-^OC@A3KOQxCr0TLYkx93tleUY9jpMc?X_-m~Bb*<9O=O5`aId^dA5>);wbuy+f4 zdjaIH-#IE8ufiiU_<6aXRVX|_aD}A>ZMXs2=k9=?*FBx03Y`57{EJ-==xGR-@eyOw zxyYpja>aMfvQH*BkFv5(TY3kux3Y}+xVT}z^WvYp_IU(7~+8-}Pu$ z0>{NA+w_ZkFVS^$NWyobiEG<+%3`94>+3RQm9unNlGM7J;itjhxNTVff*y73lyBw` z^Gv6;cAYOEbCWJ{0w>-xZaT8f5NrE6OtqJH-T*X}gG)gE9o;V&F;s+!Kwv3M2tE_T z$!^9*wrLaFMo8cteAsJFcu0;u#@*LLZ+$|W{wlJq++KMnF0tu2twyh6ly>O(>Oza- z!=4Z46o0%}cYIUdQ;RD*rN(XEvF$S5GNWUrlIh^X83%Ba2w#G=B=ze~WpFBxsSQ2oF|jM0z5pQR>K9CYgGlu0rs&Bznaa(=RL|EvnE4N|D? zXyvnPd6zO!l7|^S_4|bx2=(Lb+fk72wXZkOLFBOE+h$3zRN2xfW-Q;sO_O6dkdb8O zfBAe4IU@twZFSNk8T}}Zpav#9%g5&#cm8=IkSlb_Nl2}?d@Jl1dF%CzrHo)x!EbGj ze%#N47g?3Y5r~cOwA>s%APw`vhG+EZAwP9(JQae+s_9>TLNE7Sj+$V+TW!#>9ra$F zE&JfnpQN%lX^6vph!umbqp_$7t-T9#ZHyj>S;V+qSGhpx`S@tkbn!2f7oT|C5}73ndA^s*jvY2jk`hvU4fUm9kbWg$x&oflxdB?R1g<` zZ23qiLK(&1stJ<|F?aL8wMEi%4|Oz%-qB2Pv`~ydZd#=ganwLsJkcss^>rysZc!qW zS=QD((CaAO8?w`Z@-sW|eB6x+6C^CuV- z3v5;0#&-`Fni<9=TOCp0SJOAJGo{bEJS*t^rNp%EK~IEogFNm9k1`>73L7(e>15O> zP?|agsj?NGS8XS41|HPWS>zMddRKZZ165Bc6oaKeva3_jiM4jvR3fafH#?KVA1>4T=9LjnNP%zs+`c4S$t_so>7_4*WbjMrVC7*1#?^2ShptQ;tdiL z46+jv#^ssgkcei0$CUPsclIWS0qb2R=(pI678sJY_%F+};0Y^=VYx)*txxEzqqE|GHSV zr{fxqUGPKIu|-#kn}s{9iJbAP6g5^H%2)Wit55Pb3*7|)YjIt3<8IIFjC|%(nx@_j zB{ov$H1^f&@N|{dGnUc@2ajub@(D>;;cj^^{EiQv98I2zmT?y(`CJEtCIODgh$));RLAwn*B<%*1iJ$vO|W!;nn!kF_NQ+A~#-g z(brpG?#I1OiFSiU$-llr&z+AyVmHC5FL{kmWxCI=GBS^=JlW07;Q{1u{jnFxUy~rA zzJ)eCXV-;+sreS*UBWWs=eF>AS(Y(wl|B0rWi(}Vl7WyJ!m_CF=(;j*|6!_|_=@vf z+$OLf2fa9iKUXZ}BQ9C|SL4jdf3?|+jH5)i@c&KO?En6MX>Xl`z1=};GyVs??x6l# zSG5fWsblqDK)ssfg%bvyQ=`CiJimXiBcE7*s#72wvoY|BXhi{DM2Rf%-~Tw+{(GU@ z^ZUc!obtf*gvDoLn*Cq3>O&S4GL#%71p7<=_B?2<&*yYAM83T3W}Cnue{x-Y)l}&wqFWJ$Yh$N`|@%?bY7=s9+&s_Lqgibr?9z zfR-7{hLvm6?5BLOU|2NamLjR~UX=e5(ORpGOP4L8EpAGODh`z$2LuDy~s6 zH+V=p;I-6uTsfQ0i3$W5@itvKIM+7&@)^}Jo$6I;Hf=R$-=)~fv2nqgV{gNef{o-8 zrQQL3b)q}IuvhJroL5i4O2hj$*KO~a$R_@f1(Wz>(i4sY^O>#lV~?60Li(Bs+jR^l3S_Jr*(@AcI#Z3~XRhQ~`^$q@|F`ENA6EMzIhl}k0M4-v;pkFj~) zg>)(B0}H#iSD|S#I*L=~s+)Jq#-zBCXx0X84tFD{LqkJKAVWTT!~74Fk4Ycr?Yd5~ zh8U}qid^LM=lQE{u`1QsNOd6k^$ZAz^#+TJ?Yhd*ZKcnUv%_AsA)fW(2$wz$d{EMG zN0TPPLi%Ai_QmYuQ?jh3O71AWmv4q%aB8cs?R%kU?U0n2=QkneY=TP7l+27N{O$Uq ziLbP-!t0d%yTc?wq$%2SX$L|@dK}8n+5OFn0ghJKiEVviUkge11zj<7QMb6{raGL+ zq6_-55uWPfKh=Nj^jCsRwaE(Vzg*lhlcS~4WQKW_c>E|yRg`l&W6L5$yKg`Kv)*Kl zxHl0Uf#|gwf-Mg>@ilXVI0r`H1lP%VTG&cdmyTWd`~y_0sl^{Y*G=5zAU!eM?-opd zX~!6sJFcB&_;;6YsPek2+HEf{%eytyi&41D)ki9FX}G&0_GEpXznxV23lc(h1UA*I z^k*{Ot=PBPBaS=AT#lKUMLt(>O>)*|qiY{zhY1;#(nh%LSnoy!GyndYQ=^k@@dRPD zyk|X=$~STdS~9;|b+jgj3#@!#{CVw@(tbUi(YIIrnhdNOkM`EmRPcF;%K};X<>#lf zkGXd1m?P%?5t>6?xbu%K%TUEQTAtC`97$v$(?L2)ASA7Rqwkv+OT3q zIV1zxyPR*8RkXbGsxtSMEITemy1&NMuh3r|4{{ETeM0X0vg#Kc-kFvV@{?%@8JOjN zSx_?RvIgGW@RvX(p_94t==0x;{rDd-u!gvdsk$PRKJPz1eEkHog#=_>v5Jq6w%iuu zN82@Ai|>pw)1Zmb%IBTuA^SId6|{1?s_To_#1Tii<7m&j%n^7M!=zE?-Gq;$j7`f; zznZOHa+8FUD!<6TZ?pGg*8X_D=G&O9l%2zMjZQwV&1vX>M|=Pu*_4m64Y||3_TK4c zW)Z>Ch8bl}$_kz7;k#1aAB(16YYFA-pYd1(Tb|6P#&0|Z&F7nLHxZhr2k$m-sx}@o zh^sDNwcI;--n4*d7<_7TcP!GAnPOU#Ot_#(?eP8P7^d#gqHO|_CMx&i)Up_F;qhEf zqrh+FXOro9AE)}<;?G>nTFiiocjAnBIlJoGa%^3#G9tzF^jB8}|LN6OkNqR7jdV(9 z&;8Jqj;D9#d8fk9rHGfgHD6fAYy|l}#)lOe9?3|`cTaAzXW1PyP~wceoXpNmyc2t2 z!_Ra`+xvMukj?GoSo>spN5@^#4Klr3Wp^g(?Xvdi_Bvylcs5R>3%e9%G$k+3ZR~VA zUYO0Az2_5L(cYR$dm!iM@|2P`q-A*-6{VO>(94S6<%;={R z{BgViZEE1@W3Dn!Wp=O9&vrJ}N@s)c0L|TIHvZzLYSY4oQstw}LR;T}nLwZ5%#GZA zlHkwe{l+G&%w-EBe2-aHA7&FIKDIF%OU53=d}FWXCzYHa*j`^jUOX^++^DTQ9(Uk& za%pS-gwnEZa}!-FOps}v`;54PRA6LF(VthVe^X^r!T0sNsaO<+)ZG(y>)2n;yhE>Q zC0a7*tjU%o(sRiSzep*PdkAR~xCRF1@S%zSl+)KBsbQ6b6^o_VR|^E?4kN8eNT0E?pge zvAq;Avs*Z^w_sP-?CdnY=rm|Hoo92rU^C-nvnMWTlAui``EzpV<7hy{S4YZ&g{@ki}!ShjP`)R&fyGcrHs;tRV^c!f(a4rPsna*saG2_VAiT1@rkRml5&gQ*ZY**N4dLek-0L^ z{`;!qO!h|51G7H2CN-a6adWN5LI1oraJq3~{^Wv0SzNR0+W?PrdRO`{p_n4v#?DHG zi^)if-pvWIjUWf9u6cvmR}##>ySqN{whnl*~rA8m_3G&=g)K401Ky2doBVNOlXiQ0UZQ_d zyaF@%+$P#abfo1$?B*9U_@gHYEzeb!2J4?T-?U8owToZ~xFF6uC*4d!Bh3KY^R$fe zuC@0|=5{JOac-iFxNuItM`?el`4|2gLf;?rNhf_c_NPluz#|T2!PwM4Q>TLlo9Wi} zXNJQJlH(~j>$KDCo?1abCte&ZC6`^!FyIP5NLdvLgabHKiF5IJS))7U#n+2zPHdvPDO z(SWWZKlPby4ia!p!#JgU&U)-J)PAZVH`M*wU2?$vEZBT6CB4d>`|AxgA$ewY?_#)r zihb;I^Yq_iozps@Ya{h)G`Y8}Es}=Ke%a1%&ct6|m~RnM%6>DaysM;h=#p}Dz`fC` zuk|Tv#esQddoK2>L05Fn_LZS)FI1PXa^(oX0w?Z5$X5 z8O1#I!xVpS1krshUpGNTPY{Z}C88r9tlNJxzjR3DT3kCFzy97SWMG28=W}*loNOVq zdVkR7eSXaaVS}*Io2RvfoQ)d6GCP|A)f+q8_+P^SBXq!tEw+oW!QbJe05* z>w2o*<=fajd6~RcALxo6NS|W6M8>7UzZYH{Ky$}Mh{y)H!?&^nmKnOMr32ESiOHO< zCG?hObeQi65NJV?r4_}x6Ah^=8?m%~6s7w#-^@YNOTL!q;V}0>=j_4W4zoDJ>8GPt z0vcH3PQB>G8GU%~?6nPwKD4#!rMBb1fB5>tl>bzXtfiLWCn8KjSa{lzF~aX|1?#QY zB#y(pcdJDs?i;tMQV#a@#Erl7)g_l+zb4vmV3nXfdCJ~gEwzB?j&JStv^>k_@JW$- z&`UDAq5mePH{NdR3c*GFQF`BpPyTs6e0rX=0p^ngH+U=_X5$S)lovMKeA^ujtTMkQ z6@}%Kle~g6Fq|$nD#qR7`r0V57AE!=2kmVj@r!?%& zQ!=amMZoX03Nu%F9lN{b8s~< zNhag!yC%b3^}1K>@8`ydojdA5t61PC)CF1-JYMmVM?7Jpoj}YFiFW>K{-vHUoSBlF zkbcF9;+@SfpD0P~pXM9+gO#||BB}|^>dvg-Wc!zPXd1=JdmA^t@DwN|C9CYujj%FP zYviWfOsSN0Q+=?QiXlV{=5e-%wOs3ymOb*HVaW24+jTFfFF)Tm=cOAveidF{hGL_8 zJp6mE_V&PlVthVM>0D6FE4erRFE?IV@b9T6gxo}w95pn*&5KsriN>;e&pDVn&vv(Ky+V~-QI*Umb;4w5f4HEmPNd}W$z3W=GGqONJ`nwgf0wLY>!EhMs%7MFZY z1!WSO8KgC4QkxGd{h#s-)g8SA#J6V3VYd^_OESr_xyKf&OPUz^L^*c zy}!Bl9FngCjkfJ~WYR^WCUta(iL^0Ne-JUpZ_T+letH%rYVpY}X3hDWzZ}(x2v&Wz ze^&MDzII+)YH}Cm(5&SmOHs|c0U~_!PMkjRr>zcvofS}>}b}W>3-THxh8wKjqmq%*#SGIuZB_`u8HkStvn%! zzi-#H)<&v9s@^FYbeW-4BS-1uuwk81q9gh9hxf!H3I@?uKuAMOEf@^3(TvciTLKq= zHc&HC-AEAt8^&X%I#_Hn~8^g*S97oTxezhwnH1Sy?@W z&~n)8epz9e_BlSVY~pLC#-RVRELLeTTVjGAjkMh{_L!J~*QDX!i3wbGA_2e5`}#}= zKD(an?!^lgD^_8a#Xb>C6tU~l@z>arUOc~5tVHv=-Zv#`*Ig&448W|Po9J`9GKDqM zc6h!a2|?0VWu$ni+#ypu|ClgfNBpz_goB)*r73QZPs(yAZ}(8e?NfAbTRoYn!mvCI zzd|-2j|I>r2;Pky^oVvbLEn+4E)gd8SIR!M&gL5`)j|ABDg=kNku?;4rj5!8Z1U`I zGwS>FJjzybi^~(i>vA^L>b7}*?*KSbA6=-51H1ENS+{$yqDn0ZX1wcfYBU;{f8e~QeQp0>#7MeY zw6`Ao1dlueEzCH(l{&1N(1%KWQ+cBia<)Q|wt99g`ACUId1CKdXT*`1gChXGX z!o1h<*0k0tI2%PbYs}$eGKD4$e&vbcR^(&v30);8!Ap|_A^@~%WCkzZ6v1AdlMOkf z_}@q<%qoMMHq90&G*h)we;iw~y>)LNM>=Ui*4f|bobbHnR?E;!H((4MI3tE7!8zu% zFIP6(mz9&(-zZ~?xLg^f&1dq|jT9Bh*~)|Y7@MnyhXrFy{Q7WlKJ&88|Z(muJn zIZ%D+DJ$bOi#?A#V)N00nYn8PWR?PO+>VF81gxJaTgxN$tx?*RdI)K4YjYE%waJI~w{7(-tv(>lxY_vG96@zC=xZ5$`lTPU8%m*o@gh}twh+r|>KmG0oNJ^=#u~4Xk}MX#s~Fi$`n365ztH1v zPeZ3qPjBxn74r?Rb2c6w*g4UTO*^#;ptp(+?6#wWwghN{226PG(qUhymW9WJkDkJ7 z`t?t9R_t5vue%!)VPBt_)V;sK>(&5QNK@HVE zc#KzzGbrN|61W_;4Hx?|WYg7)%uy&ttum)vvymU}SI8(F(uD#CZ2D4^2;qRhyAm|@ zg*4CE2ptgEA^YdVN%(iBs&MbFj(8^~GUgfgG2FiG25)kqR@L>}Vx*UP(;|fJxoBu; z2ptNqaYjhpxc_!O>*~nAd)|Dp)DefrUVhAFzd3E+{v^v{p_6{GC#4RqW7wBY4d1+vhn$ghuR{Lz^z1w)ocZ~0)d=(MO8C9wiuj|HLF4J^>9jjw0K>n8LZQ6K*jrI!&K?6M z`5#cn`xeEhHJOI*^c$O-bP|44!K`Ybs8M^5lcT+%?y*9PyR2&Y_c%DN!-4o@Y*Gbg zBm6dt!O|l`X%npKg_$jf<8VuGo%!x05*YnYtGU+cHZ(068QE7tTC*h7azCfT_WWZ{ zV*2&<_1iGPjA*CtcbxF?jU~|L$|2+x+~uw6wHJN=xlG8yF)vv_FlsO#b zclxw8*4Jb0w`PS;wz~?8i@D5(`FYKT4HaBBryH>Eu_$M{eW#b)ob+a&TwQIQ?}$r! z6ORv}^;JVcO6qE5WhIMh?l;)|ow*<6OG--IjJ4Cm-SLk%o7jIRCU#}(NgDO0HWXRS z?ZX-P9d}dX(O@@669WV{k!Vc*o0fqU*-wQF$yDBiys>vGfB>BW^u;3Q+$0F1B7!>vcYu34>^XP3*ok*CyoTyK6F->$x}PigrHQef{g{Wz{^RXRzZ` z@=eIwM2}d?&rXl1_h3mZ1Z|fuxfK-fQ487T9%sNh7Gf}6SvnHLBqRq*8Rg5noG!<9 z2)Kx4-IQHyGej}rQQ@hHf)ndAy$m;Fe;F%+xI==`J5s?-)Hn!OdS4fo`yE;3YOC5H&aA(KI-=&Ch z9isUyevK?Kz5Nr=Vw0*_I%r9qrkcma$iSdVU2Z<{Nq~c+{c1Q|meI<=p}1!wAmI8| zjW-$V7;0s(=W4i?;|foTvA>@muTqBQ$klK%Hg!Xk)7i7#Ro_UxWA($GC4++FS+r8s zSArI|gVMcG?!@?bmBzE6SNt*H%WQ9&S|pCuSeY|* zaRPhU{@<(NV-8EK9*blT6q~PxE2XP{U`|mhNGIo@6?N)QaH%biF1A@x4WSjZt}k&p zafmz|DnRlXw8e}p&&?Uw$VRlBVOD!ml+;2DY+WkU|H$3J^kuo8xp2Qrf1C8ZC9A(* zImg4pL*Vx9im2dV-GY-U8E(M*P4xKmboKtmR6_)eIu%y*q3K{SKw75FlA@%&we>>; z0mZ}2g+#fZ;x}Mqn1tzdgJ1S|n~8%);+@?u1N zDEa)t!*9j7p4!8n)XPczEfXym%2U_i&xDH(SQChDWPnQQ9Us)XO%=$A8~s3u&0`Vo;>C03tHS~wiv|fV0_nPw6`B%CEN2?6*G8iY3;7i@ zG@X^^3PTuU>^{K)=IGR2%Qqd;w=sdJxJOUl0h@JrWMpmN)++r9DeTX)%aDj>JsD#% zncJ9zoDXy3Q;m9(5!ka;e#YAZfS;Ote1z z$TX<)qyFOIA=KUWm4FffVN55P|DPS0IDw4}_Z!#@BBg30ds8CciPE%F& z$CDi-Ju9nk$OENp?W&8dQ36liydgF>O=|W%bIm9@TE##F{IHm+RXHvFo2SKvBZyjc zl5KNkX9iAC?<#(Je6SK~&^*Y%$f(vRXtP+It=AMT>MRJ`prWa%iL2=Z7DBbc0+sIVJNfcKN$Y!$oGHjazH5bJ&$gL?> z!!jv~Y4y=UeDVoW+Wnc@c8-oYdQGIZtHUo>2J?`ZiznI%ScYb(D?xe+ zU|VOj#2$OU(viFyYvpH~Eqr5{ka{9j=7)CBAzO)wi3SeKw(K|-)57H3VK8S20K(V& zB~A2p?Z?b`2fp!nDyAwERp@{CA_8$gzCKZffYjRi^OPGyD1=&-va{od=%quu95T~O zhF16WkufqdLi%VDS}UVo7fLp2DbG&u$TKi0=A{*}W0TLG zP6HlR-cBKnQhj$IsN>7tY>wK1T4y48N#ILZlpi`0yc0 z)M{{6&^CJLLdC{|x)n_i2{yuSZp@L1*O$pm_N*GNuQ;0~**tA$VXTph87Gem(Pk z&lRzMdbGB&yQ>e83%g=!W@e^+4wb=!96k0MT=@8$h{&GB?!aN=&>%x=CS8;SNqXhqibEcZ-FEg;XRj zfmg|nt*wf6ke{zQn)lVz%;fLs%gJ2_ zdI5Ozsr>ZNySn=6VsF|J+BtypGnSqRTPn7xw{(1qkK{yQT|>XEIt60l+-Dv?K{RIxsL0NTYm;BI~H*nn>iX&0@DUL=fn< z_B1uVSHVn`FxlU{yng95g*HcU*Cm$$LIMSP4>7a7(B*-D^ZxkKQq$?lQIXxoubJYt z-XK}ghQIDt4#qzdGAShAFf%ioZVK}!({k!a)-*Tgz`?N;tV}mY2Jx=t{j~A#EsY3cn+cC~FD#y4dtTrPeBjo%xw;vd!>b9ivPz|8I z)MviP#?xi|V}89wh%Zs5No}p?xUdN=CMI^gjthx~abjWuJEy=&+6$0clT%YFC3c); zWo3I#RF5A&_PNFR3ATHV*@)n;U%$Tj`xi7P6c~2Ub)_u;O&}Mx;~-&_(FHbQGEyA! z>B5!GjXGL+Ai|O61nv2zYL>Ij4aWyt0|iK0n8{#f`LEZg1tFg04Y&t!kc5i zr4NiyLRFPwrZFVvavxv=R>HZSo$LjM=UwE8Sevwv6KQ7xs@xJ}W{DKh7n_Cs5CTbO z9WI7tAYuWWSb~l$KOt38Qo2Y;BL(D~ww`}A9*8Go@}~|Cf-p=FKueDa8^j48dVyHg zef;W`mmZl9rD=cWN4P}@LFqgrLNgUB_iHDQ-$`>oMc^|f1 zD2GLmXM;0kD~H5oY*cP9wS(5%W&qIEN2^#)jG13gyp9sG@hj}-A08fIlKX|Q(Mpx< z>iaRdJZX|)d_d^s<@FvI+|%dJiOI!c^+EkWx)BRY5P++k74{sQVhAUODGzR3WSM)AWmwSoiyglqnf% z$*lR4UtnwI-hE-O1ut{@3%?B6R(ai<8Y67P86H9+V%<*P0 zwq-(OlAdLSs~=dsj(v)AeD3(05N%yq+EIVGvsmQmz|*tx!>P&Sv-4v33DS|Zvn`Qp zg(r<64=xjy?}&ulVPdLZeG&>mVY}o{Effm3H90x?-hOEcxSvY7iwG>^FIlwcc|5$a zjs6IT-xqOkOo=lAJOY7Vd$NNtrxb)_GQo^1%gQyC}j(lpW3Iwez zkY3h0xY4>Vmq~7M>-+rO5sBr&$|GfE0cU#Vm&|XI{0cYLdW8q$+er+ z8sBn;T%5``?lCfKtp=cw_A#;Ik9jL8D#Y5+y(4P4nN}yQm_lOvaIBLBA%C5U$|NUh z%C^>@RuoR|d{y<6O7ms$Jdi;py(EOY19@Id$RcsEa#rsO>8(7NB`5LoHadjZV~& z;j`Kpenyx7JRm&aMUYaYT!F%fPzk>M4XGI6i1~TLveUyQ7WbiV-!1_{r8Yu>tDZx< z%77Y3YuuV`X`)1yRKi-@0lcQs7z$fVp0`=*jaTD4Y9p&3!J43fb>agQdY277^df~2?qM!-jVC^ok z$o-?l^2=%@wQfnc^%}PeI>ML;i?B!eQn-VMgDhLv7g0!6<^b({xMyy2LVhaY!?9j!o(a3_W4EltZG47KCZN~ za(GXcLW&}%>#37b_n*h$eAWYb1vglDqngBNr$_1h`STp`OPMHsi@vGr#y^U%tN(nW z)BS*FTB(;^UADWGsWUy{^=~Pn#oXkoZK5t5rv0an5#mZ;Kz=Cm=-f;(NY7sca3Xa> z3c7ywj=YlLvUy)GCfB*BuEpqK(#OOC#vw)IT^@QKN#nwqa`uPYha!1KH&c|xtYvF7 z%M;5EE<*Owbly?Gu=^CL)N%#-?2>aGW+f4}On#g67nt@ysbfKlqO&CmYsyb& zO|CA;!1gn2Pxz={zdvYf{HCera7@uzKPLh>pzfejb-poSm|i~LZi^HHiRggy3*m?t z$MJT@xH5tN79Q@nDS4rqAwss(%2q zyfRuC`5ANVO{VVS#`9W{tW;c9FA{cyl-8uSJq;P$rT>bBP2n6gcW^^SyvDATfl)Q0 zJClC$b$BvnaTpts4AMV>0wJtw_@}|VW3h2k9<&vi6O`nc(rriT=>osODmO#j=T%q;`t{S!Wt7^ z`Lp;#ej%Z;oucWDOx*_J$Ox01foAYv!^K=h;XoLGGIhr5xQphdvP<>#FH&?se(J7` zmI6I_Q)D^gD70S}p#N`XIm^!M{3K`+UkuRg&#d1)w0jt+9T*-ZWqp)Rc{3*2yl-yb zpi}I4jPFchg3G-@o}UorJUob#YS=ztd)vCPm#Jw9U2ay7o(a&}&`Q-6>Crww@*OO^eB4Vc@tX%kNpuU}c8S_qZx zS{zZl!K&09U{!LskYb{i=lf@!>96?T7cXB`GGYjCF1`IN(&j>au>P-#P=;u1cAS2?;`6?)T@<=+z;;#PS<0qd=`Yh;tE|GUs{u#y5?(t@HuQ{6-mv-_eFceqvI^&^lg$7 zz2sC7-U9)Fh|Ek5U|)Yf;^KYt^J5SZi59lslmn@yns4F>_EG4Oh|S`S9D_D}pnBD{ zwUfYPAPu2O+n9^Q~)C{Maf7)PQDZ!nlk2#Kmv%n%si&OyaAXBgT7P z{}K2J1nAn?_{g8x_9r(>$vAQa+&WQW6=YgE#SNFfHxllDY{C| z^#yjC(C}QXFC{q)11!iJtg68fES3kGdVVw_jquOZzdLZ){7?ZqFzmPK>JbpFo4}T) zz}Z~tfFV^{C%`f3rVd;HRjvIX1AlxozoKxm@BtYz_^%dL~%RRboMq}1(rb&t5 zBjj5wk@ML1-gt`smftNzWb^4@YHsU|e^`BpSf2=a+^`y0sIlM$L;deIROqnEs3H#v zZryJ^4k^L>hU!8*{J^g(8X6jAdFDK>QU8VT8qCWWGd42=YUQ!NZ}%$>S=c8N^$s## z$#$pYbYr~ZiY9jQi5h-!*R@{H;yFjgM<)5L^|~uF4P^Zko($0er*vw}EhIDLUJt+J zY(s;$Mp|EcW86d1}cso3a`N3g{=MHVkM=>;y&=_>$tF9KGfg_ zQ~N?4pEQYsipw?xZH;(qtW1Dj7FDb}w{?-2l=wO17s)@OHuK$iItREo{h4;S+IO_K zPvY2Np7xOAmSb>|i-q~t;FRv=?^IP);rvm~qJx!4)xd;;+YgxvczJ^D%AlRSy-MEY z|5!;d6zo|SgJ*74fE9k}$`x!quCQo96GQtDtMa}-yJIDe7^1&}P@q(#7h#R|{LDb? z${7Q0R_6Odd@m&?Ki62X2G53L(Q#BVPwMk^Zod|}M%OkF>xe&4nM+SkFGzERDklO2 z{lY*FBh)kBb^Ln9qFxvPfwTctjvU85vlL#S67&E9pm2$4Y1?8%9F?=)Uw~>+-Pjl! zbhgXc2`X73Wp>@9a9E@TD>HQ;1114)tOmU8p-{{b&@8pEx!;%8yqVObB5EUmNMB7a z={aLX===S6X(Gk)Ie{QM7o1Wsos3A2h_av%1CFSlxA&Edl1;BCra+q!*pvn@r_+PE zw}0e5f(?reOcG+^zX5li+~P610hLw+wh)L7TdZ;6bg<*t(i2iJTI`b167|x=gyr+a z5$lZj1h@pvn}56KlMf!bF<*vnO5_Q7Ls`&awo&c zoh)Z(6CSjXZS@dPB|eys^M@+*4JcZ^+nB5=Ib2K$wS~fxUQer~YxSV%`rySi|LAJ3nx=)l z&8FZ7Z@(e+Tc;!Y@T|rQ#_Fw{ZQq;<$wUcB_@AD5`uopSg|d?O{&KG}owIYu#;GT} zUZK?YIZt7XfU+kkcbe;P;TWFJJ-&8iY(;| znv|&!y6r!(JsEhcSFYSW?4t2@9JI_^;;S$zoTd8fU}Pvw6sAC1g>BpiithrllkNmdk2q&1`?jv~J{D z4n^~HxiV2%$!)+-L!x8l)Up#To_ynh zZIg?2hKL=ugJZLwGSB=@e6j7!*2^HSG;YVK=jrzt<>Pc;z8catgZLq1` z6dK%&9($uHjMN;~(1<8AqfS!FILCqNVj%whok=ZoJX*_Vm=9!8?%4#M;@V(MmtG?W zMwWKiEN4YifA2>aMrciBj|vXzwAnUFuq!q4^HmB?9LyY0JX{oPx5x!#rf$AXb~%JfI(S4qKEJe!N}dZQ|u~4{$3(%Q=J* zZAaI&F|X;!kL?4hzstzL=GPJ!sHmr(Gmy+Odq|-u5mjdXQ9%ebxi~3~f%@9mc=1gk zTMpx@iwRKm_D@V~*?<$>1b#iuWBhF}-#59&duu8Z`Tc>Ttw-A(LYZZPr8sW!e36r( zf`TtgY8&QOIz8i+SEIQ-8t&XV&M~E=frQ4c6+@_+n|aJ-2%E*jqrrqXaxHh`(^5wN z4Nzc8Tc&4$rTdU*e%Wsg^<_Q5dUf#nFI$oR7j!fIpHuv|y!?M*e_8bmw6RqivTn}g zFk4yJ&^tnFRod&%y(1X;LhBEag3g)_o+FfeTxMMimSjxkVqc9ccA*&xlz!0<9&#?L zS!a;~tko+qp-Q7dFqoU=C{?QZL-z90BK9e;ux%leHU-aDge3^5#R`Ir* z#638HKZU!oeX1_u)m*3On9y0dNRddstsye4b^!@O?B8Al6BQE*k8JsQi? z#)TWxvCGg@_w$|UgEk`_EC~O;Btr4<-NBfv!G~rHuSu*%!}r_=D&OHY^^?-zW`?y? zN$LDU{zMDmtTzSXmQx8(8F5>*-@djsBKh+>J@$5fGcZ~i?wRSIY+m{KKykh5(*-*R z$Dv%wZ{AZuq^vIrogx^a(8zh`BaDxGGtQp_nvM#ecnrljc6G*IUFju|MIsNSW283l zZ)V$8!i`yKF{?L$E6wItPc3xosD=!YYriJqF;sS3i3cUdo9N{MR=uQ#1|O*O#k-!= zlw|3Et|BxUG}SB)R&DvwVO)Jl_+OuseSIX7%XEKz=<8BQR|4nelz&5@5Fc)$4ET&- zZL2_x(_DpF)EyZ5&aT!P<+R||z~WPVCIpOb|%xy})FG=jJ+}&MP!Au;Um7Ro=-<5ci>STX-wnH{>?i>|OBCM3KGJfA? zeX;!mQMb<+R9B*n@nPvEetD|O6S>My1uF)t;hfNKQJ^mUvZ3<;cXK*RZPmir*)}=) zbl3fBHsrEntC`6qa_;rbot91kz`yCrPP03k%FK%CRMMKxA>Gruq|#A#Lx)5|`5Q+S z9}6Arz#Bu~D!1p(xkml|v>~Wj%>FwSbchZ1>T>DXJ6(7c9BaPNS^ZpsTW`LdrGSix zC=gE)Vnc{rl2e98*lwseGL!{pJVaLXr}j|(bR(Kp357y@Cn53l_2acR5go|kwmY=O z54m4+yfJm)JpSm(5%wI8|IT6HM~$25(9Xe4exy9t9N9P9f9QS%C}8b1KjWeE*M9K& zexjG;LN(uFARvFgF90h&Jt|iiTGv=Pf>Al_(?Yy8Z6D9y*}4gQ!YfI>=*JHuW-(H7 zd)X(*-6#QaVP`=Y|GQrHa*hMc+Uh_NoS>Q?EJYfXD#x8{Cubg>ka-fuZ^LELV8g^X z&5|#cZ+Ds=8TRM1TGjbzH*L;Xr^$N|-+W+sF!>DH*`Dqgpl2$eL<1dSvbAWU%%hTR z;x5-yaz+CK!R=LN*;abk`n51biQZ&#ZWb*e0fC8?If;SAo}|4_$%iIGq>PC}XA`yT z64$=6s#gEL^Q6Dj;;vdKJ0t4jIZ%y}ics5?NmJe1Bxb>Ub=v5ZJU;1?g*Jkr5qUN2 zM!&|erNKg>VZIm~r~}h@7CSqXFEW1z04u|OOUNhzTZMqvf6IU!BhSlwu61H=O*UT8#_+5|Vwz2^BEbb6 zrulxa*0+Glqe1umC;@wjXSrRVN}L2qo(^8(HQL$EgDu1~n{vSTb6>kGW2Iy8c{hu& zNYQh+gTy*JL`7#e(q{rWO!v)s*)9OsRP@yrI#9cZj`O;!=A-N60LCKU2|@SE!vgU9_`PPy2au4a|E-s<#kgLY2lP4;V*Txd~&>n=thnV zCJP&kmK^W5iGh>Wnw$~*`<~0eG>Kx{kDA{Z<+D>AaVxt=wMtq8(a*4v;1TpUPncL0 zwYF+L;{5X`)j$CuVB>=DJ2!e|++3$4)6-}^e_SEk>&5Rc^icAZnYt!kqo(6xxe9R> zQ%sZ1ZFh9=xnmRd=}XOFZ4+C7 zYV{}a=B)nFCO9~Vr(>BZb-wS@!_Xow$NA3BOhy_b?n(ecmanW*i@9zSEkK-{2}{)|$>mtvbjba++KNiRidE@F3u zStE_&w8+w8wVN4Id^KDvoXeU?v#cz4e)wRQ>;Xj7UPv$GGe$@`-A27W02gI0Qv|l@ zTC(rO&w1Fc>5ngESOHN$q>P9x0}eG7gHbS~A-){pxO{61dV>8|h7PQoZ?Q!QR2>me zW;?e~L>6Z_9dque6@a^SXIMm}K`Hf?DRj%SyPQ$GB~^V5j0z90dGHS`L8j73ihR8m z@6_@mO4RZT`%r%T2#IX_(tv>{KH*$mn8-`NFT8e*4xz4O4#6{hTt-ZkYY&g%|LG5p8Rx~1i_@0qn}`!p`l%V zBUHzyQEV>28_K&@D$!bE&uxm%x;Jf0$dw)=5LBv`-yf_6e_TdtcI(D|3MhHl`>1Za@0k7g(^= z;|<7%xD+82Hc)>0YQyX1w&OZVS?6OVs2vAWOz2*;mmOy+s{^7-=@HX7zT;1LPQ$e`D+efHO%SWdS} zkN$eLL3nj~c%h4GIL{k!2S1eMr9Z7p{hloZM`}s}?d+m|pZ( zUe>~{jpiEkM(QfxiqX^zPScEL$S8f@k|2$=Pv^0kh5obFAOcF&(b@2bT57@ZiJ6+R zb~#`i->;K-cogtmkPN-IA7NmZF3%XLlpyvx9$#9^H5xXBpTzTQ_J=nJ+1b5N@{( z51h=?b8Wr|7t+7#_~eN|F_IQwoRMCD{H)?pV^8YKC9@GG-;+6T%3PEb7x(f7|Ko^X4=vf$nRb4+WJ5!^}m6ng&)F3P_c1xq#|O)U_940=Y5&v z&tU9FX-sEjS9Kbgp355YTi{#}diqjyo|C z{71Rt4GJK|L?z!x_PKv;RZ~-D7Pbj!1D1v6vfii9rof?d{HPn`3pvPIqb8;+KNG&w z-K8EZrF6S!I;7f2p+R`yvA3KR9G~!;BOR%AUwG$6o?32U`R5yu`B=J|^Mp5}(%Td< zg~g)dfyc7P@sfyr-6nnJt%ewCw60Vv+M1gXo(#^O4Dzd7jWW_868y-J^LG}AI|7} z!h?+kHe9^P<;`+evl-*w>Xxd{Nn)ojes;zK)s>>=y^4~RZPd`p5z($dlsGxtA5;mHikQ8K8b_&q(Ww-EDXbIRhqTUA0$a zqPZ+t=)QWETYCa+!*bOx85&I~P<>Rx%g@4?@3O4%nA8J+mqiMN!Cpu4_+jZ?doD91 zVxOVHj%=!XRrx(B1I3o~P;(tOi1}$5E%;lbwDG7-_;W_N*jQ@}K?Nv>2u{m;VXe`3 zLEq&r4Ft`NIc=raGQ1D|f6?C&@#(xpA$C$u#W-i~L`x{L#7FE9GB@z3^Ko8jTB=gYfDq;$PO0ytFemh1vm}yw%(-SE&BN^TB~xA!q2=A=63`K130H! z=GT@7hI)N{{qzUdCmt^K2LjJTaO@R~VWC~~?sK_P3mu1Rv@)xoE<|uU)?;^f@JJ@v)Zg{^kVE{b^~g%Sc(-kUhGxyLuIcLCCWGCJc92>$mbxdJC`Xeo znbY0(l9mkn9vZ5z;IeRR?=s)(Yn3Pi9TViQlu|+$cz(ynbIy9RGpWSm6CfAM$4O)} zLED1LCa5^*N*pNtKmsYpdb|rJD)8tfci$!&HCs>g+-d)gi-4tEs`!QD;}cJe`A67m z>imwRe1wD!C)+`KiGI~z&VSuotm!XvZqO>>|9G$&c%7`SQUg;+Md^*oH*pfQO>GH9 zHF$bP3EQ@YZuo@ZHc*_HgtTu z^p#c9L_Wi2Z}sY8e<0xx-guxWN8(Rj0#gWc*yflg0n{UkgOzpF9QeWWVQbWJc{JBE z?r4_?HF1tDJ$?GVoSSN!NZl5)?1TNE>lJ-t&=!WtaXqq?p%rbOD0Q^3w4?(R105L} zs!@DBfDy7)n_4ats~}4MYH5~E3(Ep7R0fQUY%Z3-#jnN9*i z{xtLgJR!sobq=2|Zkvo!DUp$3re2`={Ncp!nmgmE1g*Xzw6s+Fl4~liQJd6ueAqW< zNK>s@%qVAoBasFzbj$5`*y!(F7t+HYaCR#E5n4O%dOG1B@?gMq^BzJE|He3!!t9R* z3@q}wDV?U5A&GCvhOYEKhKkose6m?@Ueop6^~|G)v3>g+tL-&x4nb^%XqF+w#rRxVwgid<}T^_GlQVZ|JP zhS*R(QCRqkh2w*To?jO-_zd3_xaj*ui8Mh8D*GXePZt^6_}t915zCqVWDTMFH<-W2 zmsML}Th54z>NYQ&Z;!XoVPjItS|5&VIxL}=Y>+ycz-M0@kqoCZz0QDb#Mww~%J5OH7$`%aGIqKA|Z_v61h~0Jz z?MVp=jcKiSzmhbRNEnu(o;9TZ>cu5#ysNjNCYhj+d|mz5dC2t8FsSNs!n5X2E!n-$ z^)OjE@cGRL#L!nc;&SB6JLYV_?e{rRR=yO4Jk`X6M!C2=B)o=grkVYW$L%~KDB*JT z+L7Vk^FIh~ReWC=Vvb;^W$#UW88}>|Qff>l)*LZqE(YDYNgKds2g;%yW}3FFBe<3Y z9fJL;Jh;aX*@gU+3sCGg9d~t?JLC85?Aj+A7_oJ;96qz(wRN;P4s)$SMN$@S*aoks ztDoDn96UFg!niFb#6_W*1R++g*1NXldRCYJhlxTVwLVJJd3|I1{B3v+ZPDda;4Udk zER;tdrYN=~tV{=7=Ov_m760oQ4nH)Lw1C)H87)m6Eeg7L^TA;5fz^(YBACgr#;J`S ztr1l|J4^NnZ|F}C=kK1&(Zj{g+wK#O>~~G1&(F39N6?O~32kR;IbVID5xHlw2Irfs z)NgDp)M!rTP&&HNA6cr`r9YglcYLx@m+N@=A!QRnG}CTLv61Y4wb$d%^&uic=BA+q zr({|j23-s;YS~}RT7IYuS|6?O!N&Oo;p+poQ&d!X3$q`@EnD34fwS?ois_z|1B+4N z&AH}=UuDi|`tBs(zxS88mRC8m_Gh~r+*f!ngBA@P*lEYbGhCkuyBfvo3%fzhB|X9u zW9bfSD)IgmKDX#ea9E5(qoqlWAGI)_`R$C8Z4!@KznYdF9&e=w1#uBj-VQvvjeLYa ziXE|pk&uu(L`F1!`F)RpMawTJh@>2zT)y0EZd$q;wlau=#S7aZ!o~K=`))H(+cfS< zKpbNX{6~0^JVv@CsTm3J8&y#P`wk9V@j#U_FGBpjQ{P)H#1wPsHxOf-ckNaMuy1c{ z>vjvR!2|7BtZ7I};x&zd#bZ#$JuWf7gQyZh<)1fp6Be*sucAb%r&v|ZS*&k=2z7U# zSc8u6Xh9VaLeLzQq>vmu!a3%7r?22O)o4*pi{%w0At9!`S+`b~i*s|OAT2cQd>GT? zJ^dDl9D{AKv*CRIu`u>IDHE1bExXG5%D4}c-+IDwSZp5KlYadwD9mO7svU%g^QOvD zuU%mgr_sjy)|whURixV68=lO}!YaSc({Ik^37o2yIv`U0zhz>Yv88!lJ=y6dyo!Z> zWz3w(>(XLrmHwYkqP)H1Y$Ld)bGf@>hQ`pu_FLIU{hY)>L8gA?CXDyO*yP;2+^>+J zX7Gjlsj{f+f@BASsi8waS?cQJ(^K2ei(8eIx_i6&Py?%0FFs#!=zN4QRLn~r+3DHs zO&#-;pKHrm+qYv)cewOkB5`@R-Kz1=Ge4aC+Dk){(T+9UPoe@8aM1D7nPFm(KOYO?DPiRVpG@M zMgGoJeHz=|(F6_GN10DYOA0iePsF(AVIoA{ULK#A4PD^$5cQ92wO~X))9*xbnoYG@ z?s2)8nZ`KaXGFS86usU74XX9TA@yHvbXWrB-x?mZrk!h7@O&Q^K#24$>7_O1$-%}6*{({v@J8eAJHqTbkL$!H&F$F`leE5JwFi| zTrc3_3Xo7XB`+;`B6FcVqJQ7+)ohc2{rqN9XFQH_i7Dk}+fSQgXQ#${>h{McINwUj z{C9mJCI^CN2rk81e>@je?i4XV$;)poQjk5hu8oM}F{3y|UyAQcTMhScNu{NMDM;m>8M8F_#>Ck3dFQE~sQ7pwZ&MQBVe@ zq~uV(evXWU*zlx<&(`20ibk|KH_vz_pEl2sNE?q(wM_QKtI1MM9q?{S2L8Mb4gIdG zzJy1@piHBBo*!BpTEqR!#90+$O8u_rnh;Z5Sfnw$zy1%RTT((l=yeKv$ zGrJd7fR_%e#1#Yv9el5WYP2JOLOF}34*=!b z!w;52qN5)=zf_nE+~U%?NTQUQsq!@_Ah;TxqG&-EG~iey1a)C3Vk4a3dnbHO=V&oG z$%wI%Q^82OyDDWmc+WD*Z*trbh=9(H@4?^y9<@ZOmF}!*E&vmB^rtmgEI)gbZL9?S zHP0(69swv>%r}!?LyJf@E%vOKq4$hNir=2?>re!<%R=;?j^uc7XJrMmDhJ?D3+jHL zqRlm|OpX>hFc{)}RdU2e&=^*CHT5dtoaK8u_U=F017;jm^XVG)i(RthFr!VV<=9tQ z@2Nx`L!tKs3l`-A1LB#^(SlB5j=Yp~bW9^@l$G?a%#Vi!@}EE3fc_oZg4a%^k8#Bb zsor(}diK`o%-Ja9{H75FWWoS#I&d+wp&uNaj%zlN(ueO>4xup-C zk0X&hIx7WZgUjZYLht}!^Vz24bL$214q!nPq_x?Z91t&c>}zFB&9Y*7gH06aBHxM@ zJZKHoFuc&-#R(^A*5|f5+Ri#^LbxSjgdX4#CcuQM%Sh>N^rpchqTDM}{>XzZii1|6 zW_a!sx~s7HLf^E%Vj{=jgYwUFXifBh?Nhf5cFOXz7+`SO4t;FB%cIf}=Prcz=P-WO zbncmOiMcQTRYUBoS%SU9EisZZ1!{5w(ECi!u<2yZ(ItBDh3f)7|1g@rRky6?E~X%> zqE(Os#AeCE|9HVPT<$`sSwriGvr?h2;X79zB3&d}{&jKwe||*~2K${l-+ub}(4NJ5 z+}^DCCtv=b7IFtR=MY)cLpK?KMHcWX*dgODtRH?@Sh&#^AThbNsYmibqmn zD!ezxM^07mi&1yT?D?CW#M+JTUq8Ep{xJSqi=bNBry6J4(CD2(0E2M7v&l zROW_&&Fv0fcod3@cPmHtvb?ys@^A)iUQ=+EU|YH4?t0T?&5gzoZQqa6 zD>_}LC(Q%7p7bY2Q;VDBx8#!yUkmaYzC#T&wxJE4+3^FYzhzczHj9QynV6c;mXBe- zeY*)ABs}`h38=nf#>R$CON6TVzahzmZh~N0AFlZLg<9}w2Do;e7(N8BeGm-^vXQ)s zIW<77N{*H?j?bcB>dC*gSyio}d*)`G(>Vv{w{eqs9=7qkTj#8G?z(H8wa&fg+;#6-_peq9-|zP`z3=l5&-2-I{ll2M zyiA!OLhqMGC2lH6^qK>`8UCvesqf>S0i_2V+J$RTLDyFB<1i`L&!7L(CM0e9<&XjE zY6Fou$(;f-UvSZL4t^7^&7G(ivJhZP&dQgJ`Egg~{8{hfpv@QxtjF2s#u~qg%*p zL~zv8*E_$~a1*+qMcD%kdWXjZe@F!?Rc-Z$(8v9J9;2xpF_knv+iOr|%8T6)T`ja8 zRg*koMnU(*k-g+q7l$sgrBzfat0T33e6e*IG4;O4M+_pqsZ0)aV&kGk2wkCAtlY6f z1+pTOop6?^ZWiBaYyMTKbY&!}H%?wo;ZwK3usw7~(m(I|F|a&t4txE#s4tPJ23@U2 z(C$dNwPS}2c%5rq;WxOj5D8Vj$JD%3rPHx5UIkIRDEf&v;KLwD}&gP=~i4a zyrRc?VeQ^$Vh${lcB-ZQi?6To_&DJQRFGSiKtV}i)(&k1UKU9(y ziN$ir#E_8jxE%Bt;B31))^%+H!=+|>P-8aOTb6|ZsOHMi(0LK?OwGs2?Y(nmd_hOT znqblT(|J}7XhKlkRvV#WQEKXQTynCWV?xDZpr-&vU|Rn_HQ81@Wf|spxo6t%t!L)s zxef9I#>s^QQFZiL$E~d;rJCTMIqHGEGrhpo-Deptl$L=^85{IsxBqg)>l&y)H1QFF zW9#rxBChy8Tu2Rp>5X$7lF%RU34So}QYEj+6iu+Zto>u8i{lCgaN)uI=0Y3KHmTDz zJO-0`3?s@L`(^}gV)g8R`k4k>!-67Bu4TrUB$V=H!)!* zzmd0~D61n%V7pT6bU835=OMPrqUBMVEVaj7yB1It6BgyCA@X2pB1@J=U!8y`81WUS zvN|~mn{vD%2G+s|glae`C`@Y8lRLhdKMZW4&+`;$-Iyi6m;3S3Of0v`bDfI}S6I&j z9QS?w>c`}44o93sxn>DfRX{FCmk66R8iUGTsrjj?v9YamU*818+C_;ZNLzXJXD3RT zocTB785uJ=s-%MoE==}D<|@XT>)2_!2}(34$Pyux@74>5$dB~m2A%*gz-FkNxp}&o68LD^54i;OdDRo9xV?3)1|SwL zKF+e$cI_*5CxITUEa*V$;?xXwP2k6a(JBX_yf$|Rjqi2QxUwwBcmS$r-=veyis{Hlb^l|?JOPHgM~LAZGB3JI1IHo|e(c*gDFULObKJxVbvp@O~9FQA8uJ@#K>HUbt)MCh2Mv zd5^A`DfYJsAq2fPIIj%*>||qDBe?%Q`CTF~=&Tp?aL@YQ3ga8FdA1-Z!4SXh zWHX2Qj}iTd2h4_lB_XI3RaDX&xy#>T2NM=WoQrMt07j5ZP#^!{IW$=1KZnCr*J2Qa zhI0V^7u>8NB5UgYc?a@u(vcr8DY}=f;Fp~cpAWh+qPaDW=l3feWT*pcBOSMHNG1N^ zHmL+R_jcLt3RAM-KZh`jjY5V?uMSx!2eU2jmpp7Ui@A1{bOgVUWXS}_n2Vk5baJ#4 zW>k1?u`=#Uurt>wl<)p_r-;KX;=M#NkR$T;(vwNvml=_j)kaKpd#pcO?NV7`rWv|3lTCYJw7g-~gqzo_71XIG3{qpOACj0|kgczAE?Wdd_iJShnoctYia zgk51PCnrL-)kX(oC^+I+w zWYnsI5L4DJl>I(Smq?)C(9$6Yv)MjbviPLLl^dlHW4tkyJU%%SpDHe%BuGOIsx2;m zo^pf;I=qC~OGmO8!hQf47WkG#?HOHW!^0HS*lDQ0xHLw55L1lg(kH%)25`0~6puej z9^LOR2#cHT?aVY&%pB(vuqGKHj6B6)jQBgDctU4`RY+xh@MNu9OWP-5thmgRXyMAs z55)WJJorp&d1EL~)YqG$jrFSI4=aR`o-5N}dPV+{w^dJ#i{`zKGNXzcHDG6=S@GY%^Q5EiQqO_8a8$4_mJggn7rs0*!Z z^CF|BT4})^o1r#Qf^vsND@T>PkS9%QPT({CedNaaTs>dRjY!A!8;~WqgteZ`i4bQ6 zKcJa#v%Y=~8k)a!X=JXrC-Cz?{+{IIL{mM6_S%5hlm&O0bO!WOGc)Y9N)U?!ZJ-qC z!bAA(Y?Z`v=gl`3uL|{4MPdL5^qsBU)Yyf!6+F-1U9>s0#HIgL`z37)9I-d76R~UmXj>`S>)Jy-kC7h?1 zh!q`$q!h>5^L2$xA_H^0tnV&F5xDqc_3W#XT_7{H+~qY`KrZ$V^aR``M!x?WEm41W z;D03J+o+GiA$va>K?=jS7v4$&{iy+mHIV;Lqya$vzp8KiUthh(zp!ktG$j~HSJx`d zf@l|3EeA1G`54oZBqkX`$G*Lm;c7?jN&C6QYB7ExmP38J^V92~n41S~m!_bj)cq{< zr%4$#$CukWSg&|961zO=5)tlgWN$GEnVBN@@7Fvjw7sZbOo{35-$sUph_r6ZD_(#6 z{UkZHZJ{O0>9-MvzSzK$lHR?GD`a9h@aLjKkdmR+!xrb9rgsg84w(T)U3i36PMTT{ zF4JYO_}8y@622Tq$S82LM&f6u&H)+=On`<&`}LW!m-VK3CA%ig@%3&V+mFgT6iaOE zP#f+$8>wrvo1Au~k;(LiPU-6DrC!^XHfhRCPc;{7QJ9@gyEMac3(H|~k@*2ad*LFV zw6t|YhdU`NEB^}C)n>YjV{siZ^%;IgyKEr$Dwk};0vgv7hO$c6ZctZO2UhGqEI#Lz zN?g!CEytT>S;Xq|%xEE9>#{RYCh|($d#1f0i#n(DOvT&FS%MC%l_?G|P`RFG+HXYd ziyY5=a+*%>%QF)X3R;HGCb6GvGJ-d&mG94L8~7}@N|f_>IBDtWV>#3(EgO(oRjGCEUd&)%`A*r<+FtMQ2m$biYngv}^vXk5n@@0oY^rafHU+8FGCET8H(ja#rTjw2Ox z9-G<9_Vx&JiHF1#Su`xq*_m$77t2Nf!&v_Z#7o{`o4@d18bw*TI43YaU@Sl-R_j{^@Pz}5$183yFWA}YU_@}mK@+W zhb5{zJ7c-g?_~)q>zo)hb@e`!)%vYLplV0kmD_~n=iekd#`#$(E}av@uDtsmz5*uK z3W)GINJB|3{a~ZEkCU9*ya6L4lmC!QkXOXqjJ` zz!-Bw%;ZE5NlOct)530)CJRJP#zbP%^{uTf^(-B}$||ih44wgG>l7~C24{m@kMdJ&y6l7kRXHHn5 z%X|UYXNVxO_of{FgHHlZQUX8TwcO1pz zXP2I|CW`$2)2{D;ScSHFtwAPPr!^XqeS*cEgIzx1PBaMA-ZHcZ7073L__LE}yOyC( zGKGe5R}_cq*Dl*ufu}xraU_-F@%IL%b+%s2O!z!- zr#3*TJRIM6jkt+2x7*ug=Qn6+5tM$6LMylxFCQNv812MZO+M&w_mB2l(bEf{cO6N| z$i*=Mr}zlVvKHdIOWc`w&{8CjUN=ND+m)?y>~ zBH&9u>r2|EKK;#m$Et@31nyn?89~^c4$@=%c&pz1Q}?DjRQef9BJ=a}@t14O8f(og ztrr%OK;lTUAttjC=IFfmt}|$1+``<{_!FdtSa2b}2`;EnhZ%w+_3WS>=*PB?>o@`44b!3{^3KTpM}NA^dRlf%a!iQ5>$H*}|}r!U3{E}MuS zlv1gPF0+9L{pG|F59bnu-_3WouEZ5=E-it(ssYRVsyp+5`r0)MobQ1v1Wus9$Y;^Q zI5zB?e&%BFTR3|(r{9A7Dc=WirW(TPHITcx*8S}+8R#iTEMPG!dz&_d{T&z1Crf-U zUK|Yy9g@`3qja7{8>7}!;4ln{fz6lzkF>NZDZ@x6uzJc|?5hSGi?9Y*n#o?wGWV(W2 zaY3{Rn$YDk)b854XM6AP-}c^q3O}yy&g3|I-F_Z5bBe*G>E}baIt?TzA?%85rM!Xy z(a^(e!le?)YNc-7C1_@_8ty=(2YLW(b8DPv>*zKEf0J?LE9yM2q%el5B|t8>6FNC` z3#h;-oS-(We?yZf=*ae=5Vzf)*4&r7m~THS2m6os8*tJYCD5@TDgVQ=lW}`3PRf31 z)zw)Hw)l!Yk%lMiUeV|f4=66$TOiu3C@p~;F2=hH4fiIz|sv8s}BooPcjf;V>p$T~-lD&Ke5i=lW!5yB{21L& zAPDXG`t32vl$LtIH5DhIOK=Xim(wmqOtF$f0kiu4YYIb8x#f}H^Zb}>gNOLOTr7Cm zDDL5djeK$6eVvhwm6iNK3mJE>-J_oaiTpZhKPUzGf7A4&@;k*~KEOzEN?%Sv0jRh9 z)y>USYDr={D1nl+LDf$8boF*gO!h5kf}-A<(`e*#ngvESK8 zDMA^8>k{Gp)juu$IfrFsY@vBe^;2~eGS3B+T^YS>HVBokJ1H-8G&s&}Etj#e8>Bt` zo#*DJ=Xoht2Q}qJz@r~e_5U!U8}=Qh1LR)h=)An%Ebq2;^2`~oj#9#o(=>18Hy;?C zq4zqa=Vo?QeSOfot5)x>$Qpz*4OT6W6C1tLELwxWnGfjeN7VUuG45gRNi*MXP8e3@ zn@7O8`t|EP>lFdr7*OTQ%!B_l_GV<%^RJV_1560QzWZwY7_08*&2KLs@cw%<^(fxF zxw!LdI|KhZc1ZO6oo81==)#P##g>dhc~NzB&8CfXa&V}YD=AnYEC~BIdv#buUkHXw zz(k}|AYbplD&ziVZV1{a0&Aqv#Se%<*>r*qJ{~bz$pPU}+})cTKr?Phe7e2%8{GH% z5c<}!Df)Y<3SpMRXo(^Oeap)EaF$`r+O&Uv5#k&OZh;<_=s)BF>E1g&0Caw4u+Y0E zIgH~h8j(j2=~;P4^@~F*dIbU>V)-nnIzP)^h!Sz7J3`^t|}-!wf<-atMgx??wfMh{zu7AM~wB_;_*e?mBj4V`Hfz z-l7oL5xi>#c1@4nTNVmGP$N~zM_7MA6pNpqv>{9?J z6o1D(&=%`M_O$CO7pE?N5*c{`y!Eak)@Be1^-V8vg6TzOW}NHFGN6b$?@xDu*zyd! ztuE|z{vNm$FxF@@toKe20_>=l*Y5O?`!6Cz*_uEK-UR`kTvCc&*l7WdFJgI2dj)^9 zzLDG4jKkH@@skP0j%ZyVi6H3ZmX3#3L?egW$5^_zbMk zv#?P3MMRXRa*oe}eC(5k$)*bp{rQ3JJM)oLVbI76j8d5LIk>Tdr04gBjJS9&^L0^2 zwxPHCyv&bEbIU6#UIHo1yF$%n#gvT%0>2~KV)k$e&}rB{^1ol2FJNx3x_o;vC7>4bVvtCybIY^8NYN?RCB!U+?~ueO_cFoQyZW zejy%X0G#Gp^*&DkROJ6`a6|FarvkXr$EeteqY|dIbk|X0cMn0!j1C4=(3a6j{a8`! zT@-D;8;ZE|B><%Zi+TTD*SGF?B8z#y_^OC&Haibbl2o9}ZAc_Q{BfcpB~g;(hNefr zSWRZ+`|~Og+zHuVimtA91H;+sN7@Vkh4gmZ_Z_Rj0_wf(y(&cO`Uqia*rdYG?5FMC z2}?_@3nCk04bkrL9_t3AAU;P^x}jq`@N;<#)$5pVpVIRbCQ>x?C#N*HSJzGWt$-yAOR{Nkz_vM5rduVFn=4+l(wsxZM2LHII5zbP__@rZjT@qK z<~FoY&uPCswlUV^_uW6PoHW#`B4IJ|2Q=wM`X{L8X8`8I$y(0d5pkX& zfrxE+{nSVEW_mkq*F~H&+0V%4kWMKF^}*at@=rJ(Rmvwymi`~vTnbCBs< znoOv&cin7q5``cWu%JJj%C;Xy*D_6xQXtJX7w*C(0B8JGr&RJhUEfGGDiL@7e#aqk zI)IB9^*uI&r?i#0o@(5jA6UOCxFtj)DVcWLb7S)?w?UYnpI0@;@M&uj3U2-AfaeaN zW~~t6FpLkPMaf-VYzZd$76%oR%?J*ipC|YC?^o9*nZtTPP^VDh;%(Wt)RR8~8H9L< zg?X%>fI94Ys1pElo@Z^VUQ72i6=HpY!MsI9M^`9vdoL_a5hKOTz(dmXnDLv?A!|)@ z{)DUskT2&n)cV@yR~6`Lzcj)Rt_<|(rTX(!I=|GHbbar?(KwZ(I)2pHq}6^xPH<-< zt7>pC`I}|}=mX{qW@E!cL@?1DhV2ko9->iE7$?rt?nu75w<1Iwuu0o<#|!zbYQSPm z{N@7>znaKz@S~y*qPsd@Rm2(#8IAit|NX^)!`_Vli*nvX1OET8oEK5AjAiW-aS+nX kM-MqX|EGCz69=RX7jnbO$q^lJ=Ol^z17+!K$)_*>1saPm+5i9m diff --git a/code/ui_functions.py b/code/ui_functions.py index 1019600..0a49837 100644 --- a/code/ui_functions.py +++ b/code/ui_functions.py @@ -83,7 +83,14 @@ def uiDefinitions(self): self.ui.frame_plts.hide() self.ui.checkBox_fc.hide() self.ui.checkBox_ttest.hide() - self.dialog.ui.checkBox_bootstrap.setChecked(True) + # "Bootstrap Analysis" and "Collapse Technical Replicates" moved + # from this global plot-config dialog onto their one relevant + # plot's own switcher bar (plot_dendrogram's "Bootstrap" checkbox, + # plot_ordination's "Collapse Replicates" checkbox) -- hide the + # now-orphaned dialog widgets rather than editing the generated + # ui_plotparam.py. + self.dialog.ui.frame_bootstrap.hide() + self.dialog.ui.frame_2.hide() # Top bar functions self.ui.btn_maximize.clicked.connect(lambda: UIFunctions.maximize_restore(self)) diff --git a/devnotes.md b/devnotes.md index 9b496b7..c0c842d 100644 --- a/devnotes.md +++ b/devnotes.md @@ -221,6 +221,14 @@ only handles the combo boxes, axes, and pick events. instead (`test_ordination.py`'s synthetic-replicate-structure test, cross- checked against real example data with a scratch script during development). +- **The checkbox itself later moved off the global plot-config dialog**: + `checkBox_collapsereps` only ever affected this one plot, so it's now + `plot_ordination`'s own "Collapse Replicates" checkbox (`self.collapse_replicates`, + default `True`) in its switcher bar, same move as `plot_dendrogram`'s + "Bootstrap" checkbox (see the dendrogram section). The dialog checkbox's + containing frame (`frame_2`) is hidden at runtime rather than edited out + of generated code. This field was never in `paramfields.CHECKBOX_FIELDS` + (wasn't pickled before either), so no save/load behavior changed. - **Loadings view and high-dimensional data**: thousands of features can't all be drawn legibly, so only the top-25 by loading-vector magnitude are shown by default (`ordination.top_loadings()`). Whichever feature is @@ -316,6 +324,34 @@ substitution pattern as `plot_ordination`'s method/view bar): Both views' purity math is the same Qt-free linkage-traversal logic in `clusterpurity.py`, unit-tested in `tests/test_clusterpurity.py`. +- **Red is "bridge" coloring, not "any impure ancestor"**: an earlier + version colored every impure merge red, including every ancestor above a + single mixing event all the way to the root -- since almost any real + dataset has *some* mixing somewhere, this painted most of the tree's + upper structure red regardless of how localized the actual problem was. + `purity_link_color_func()` now distinguishes three states per merge: pure + (`true_color`/green), a *bridge* -- impure, but at least one child was + itself pure, i.e. this is the specific merge where a different group + first gets bridged in (`false_color`/red), or *neutral* -- impure with + both children already impure, i.e. just continuing an already-known mix + further up the tree with no new information (`neutral_color`/black, the + same color as "no coloring" so it visually recedes). Verified with a + hand-built 4-group linkage matrix (deterministic merge order, no + clustering ambiguity) asserting the root of two already-mixed clades is + black, not red, while the two actual group-meeting points are red. +- **Bootstrap is now a per-tab checkbox, not a global one**: the + plot-config dialog's "Bootstrap Analysis" checkbox (`checkBox_bootstrap`) + only ever affected this one plot, so it moved into `plot_dendrogram`'s own + switcher bar (`self.bootstrap`, default `True` -- matching the effective + startup default the old checkbox was forced to in `UIFunctions`, which + differed from its own Designer-set default of `False`). The dialog + checkbox's containing frame (`frame_bootstrap`) is hidden at runtime in + `UIFunctions` rather than edited out of the generated `ui_plotparam.py`. + `('bootstrap', ...)` was also dropped from `paramfields.CHECKBOX_FIELDS`, + so it's no longer saved into `.mpct` files -- consistent with the + dendrogram's other per-tab state (View, Color), none of which persist + across save/load either. + - **Purity is a strict, whole-group check, not "any uniform subset"**: a label only counts as pure if *every* leaf carrying it ends up in one clade before that clade touches a different label — 2 of a Sample's 3 replicates From 5ca9baff7535c8759f5786b4e736cb89098f96c3 Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Mon, 29 Jun 2026 18:45:43 -0400 Subject: [PATCH 12/16] Fix dendrogram coloring: red = proven non-monophyly (label-set overlap) The previous "bridge vs neutral" heuristic still mis-colored real data: it could mark a high-level merge red just because one side was a single freshly-introduced pure clade, and could miss genuine tangles where two already-impure children share a label without either side being trivially pure. purity_link_color_func now classifies each merge by comparing the two children's label sets directly: disjoint sets (no label in common) -> neutral/black, a clean join even if one side is impure from an unrelated tangle further down; overlapping sets -> red, definitive proof some label's leaves are split across this exact merge. Verified against the real dataset's bootstrap dendrogram: only the actual scattered-replicate merges render red, and every higher-level merge joining that region with cleanly-resolved samples stays black. Co-Authored-By: Claude Sonnet 4.6 --- code/clusterpurity.py | 47 +++++++++-------- code/tests/test_clusterpurity.py | 90 +++++++++++++++++--------------- devnotes.md | 51 ++++++++++++------ 3 files changed, 111 insertions(+), 77 deletions(-) diff --git a/code/clusterpurity.py b/code/clusterpurity.py index acf8657..5e8ebee 100644 --- a/code/clusterpurity.py +++ b/code/clusterpurity.py @@ -17,21 +17,28 @@ def purity_link_color_func(Z, leaf_labels, true_color='green', false_color='red', neutral_color='black'): """Build a ``link_color_func`` for ``scipy.cluster.hierarchy.dendrogram``. - Three-way coloring, not just pure-vs-not: + Three-way coloring, classified by comparing the two children's label + sets (not by simply asking "is the merge result impure", which would + paint every ancestor of a single mixing event red all the way to the + root): - - ``true_color`` ("pure"/monophyletic): every leaf under this link + - ``true_color`` ("monophyletic"): the two children's label sets are + identical and contain exactly one label -- every leaf under this link shares one label. - - ``false_color`` ("bridge"): this link is impure, but at least one of - its two children was itself pure (a single leaf counts as trivially - pure) -- this is the *specific* merge where a different label first - gets bridged in, i.e. exactly the "bridge sample"/"two groups meet - here" point. - - ``neutral_color``: this link is impure AND both children were already - impure -- i.e. it's just continuing an already-known mix further up - the tree, not new information. Without this third state, every - ancestor of a single bridge point would also render in - ``false_color``, painting most of the upper tree the "bad" color even - though only one merge actually caused it. + - ``false_color`` ("polyphyletic"): the two children's label sets + *overlap* (share at least one label) without being identical-and- + singleton -- this is definitive proof that some label's leaves are + split apart by this exact merge (some of that label is on each side), + i.e. genuinely non-monophyletic, not just "still impure from before". + - ``neutral_color``: the two children's label sets are *disjoint* (no + label in common) -- this merge simply joins two regions that don't + contradict each other; it's a clean bridge even if one or both + children are themselves impure from a *different* label's tangle + further down. This is what keeps a single low-level tangle from + cascading red all the way up the tree: once a tangled label's clade + stops growing (nothing more of that label to fold in), every merge + above it only ever joins disjoint regions, so it reverts to + ``neutral_color``. Args: Z: linkage matrix (``scipy.cluster.hierarchy.linkage`` or @@ -47,21 +54,19 @@ def purity_link_color_func(Z, leaf_labels, true_color='green', false_color='red' """ n_leaves = len(leaf_labels) leaf_label_sets = {i: {leaf_labels[i]} for i in range(n_leaves)} - is_pure = {i: True for i in range(n_leaves)} # every leaf is trivially pure colors = {} for i, row in enumerate(Z): a, b = int(row[0]), int(row[1]) node_id = n_leaves + i - merged = leaf_label_sets[a] | leaf_label_sets[b] + set_a, set_b = leaf_label_sets[a], leaf_label_sets[b] + merged = set_a | set_b leaf_label_sets[node_id] = merged - pure = len(merged) == 1 - is_pure[node_id] = pure - if pure: + if len(merged) == 1: colors[node_id] = true_color - elif is_pure[a] or is_pure[b]: - colors[node_id] = false_color - else: + elif set_a.isdisjoint(set_b): colors[node_id] = neutral_color + else: + colors[node_id] = false_color return lambda k: colors.get(k, neutral_color) diff --git a/code/tests/test_clusterpurity.py b/code/tests/test_clusterpurity.py index 7138761..87cc3de 100644 --- a/code/tests/test_clusterpurity.py +++ b/code/tests/test_clusterpurity.py @@ -19,6 +19,30 @@ def _two_clean_groups(): return data, labels +def _scattered_pair_linkage(): + """Hand-built linkage (not derived from real coordinates, so the merge + order is exact and unambiguous) reproducing the real-data pattern that + motivated the overlap-based coloring rule: labels P and Q are each + split across two separate leaves that DON'T merge with each other + first (P0, Q0, Q1, P1 -- interleaved, not P0+P1 then Q0+Q1), so neither + P nor Q is monophyletic -- plus an unrelated label R that cleanly joins + in afterward and should NOT show as part of the tangle. + + Leaves: 0=P, 1=Q, 2=Q, 3=P, 4=R. + Merge order: (1,2)=Q+Q pure; (0, that)=P+{Q} disjoint bridge; + (3, that)=P+{P,Q} overlap -- the actual tangle; (4, that)=R+{P,Q} + disjoint again (R was never part of the P/Q mixing). + """ + Z = np.array([ + [1, 2, 0.1, 2], # node 5: Q+Q -> {Q} (pure) + [0, 5, 1.0, 3], # node 6: P + {Q} -> {P,Q} (disjoint) + [3, 6, 2.0, 4], # node 7: P + {P,Q} -> {P,Q} (overlap!) + [4, 7, 3.0, 5], # node 8: R + {P,Q} -> {P,Q,R} (disjoint) + ]) + labels = ['P', 'Q', 'Q', 'P', 'R'] + return Z, labels + + def test_purity_summary_both_groups_pure(): data, labels = _two_clean_groups() Z = linkage(data, method='ward') @@ -26,66 +50,50 @@ def test_purity_summary_both_groups_pure(): assert (n_pure, n_total) == (2, 2) -def test_purity_link_color_func_root_bridges_two_pure_clades(): +def test_purity_link_color_func_clean_disjoint_groups_stay_neutral_even_at_root(): data, labels = _two_clean_groups() Z = linkage(data, method='ward') n_leaves = len(labels) color_func = purity_link_color_func(Z, labels) # The final merge (root) joins group A's whole clade with group B's - # whole clade -- both children are themselves pure, so this is exactly - # the "bridge" merge (the one and only point the two groups meet) and - # must be the false/bridge color, not the neutral one. + # whole clade -- their label sets are disjoint ({A} vs {B}, no overlap), + # so this is a clean join, not evidence either group is non- + # monophyletic. It must be the neutral color, NOT the polyphyletic one + # -- two cleanly-resolved groups simply existing in the same tree isn't + # itself a problem. root_node_id = n_leaves + len(Z) - 1 - assert color_func(root_node_id) == 'red' + assert color_func(root_node_id) == 'black' # Every internal node strictly below the root is a within-group merge - # for this dataset (each group's 3 points cluster before the cross-group - # merge) -- those links must be the "pure" color. + # for this dataset -- those links must be the monophyletic color. for i in range(len(Z) - 1): node_id = n_leaves + i assert color_func(node_id) == 'green' -def test_purity_link_color_func_does_not_cascade_red_up_the_whole_tree(): - # Hand-built linkage (not derived from real coordinates, so the merge - # order is exact and unambiguous): 4 groups of 2 leaves each -- - # A=(0,1), B=(2,3), C=(4,5), D=(6,7). Merge order: each group merges - # with itself first (pure), then A+B bridge, then C+D bridge, then the - # root merges the two already-impure (A+B) and (C+D) clades together. - # Z columns: [child1, child2, distance (unused), count (unused)]. - Z = np.array([ - [0, 1, 0.1, 2], # node 8: A+A (pure) - [2, 3, 0.1, 2], # node 9: B+B (pure) - [8, 9, 5.0, 4], # node 10: A+B (bridge -- both children pure) - [4, 5, 0.1, 2], # node 11: C+C (pure) - [6, 7, 0.1, 2], # node 12: D+D (pure) - [11, 12, 5.0, 4], # node 13: C+D (bridge -- both children pure) - [10, 13, 50.0, 8], # node 14: (A+B)+(C+D) -- root - ]) - labels = ['A', 'A', 'B', 'B', 'C', 'C', 'D', 'D'] +def test_purity_link_color_func_overlap_is_the_only_red_and_it_does_not_cascade(): + Z, labels = _scattered_pair_linkage() color_func = purity_link_color_func(Z, labels) - assert color_func(8) == 'green' # A+A - assert color_func(9) == 'green' # B+B - assert color_func(10) == 'red' # A+B bridge - assert color_func(11) == 'green' # C+C - assert color_func(12) == 'green' # D+D - assert color_func(13) == 'red' # C+D bridge - # The root combines two ALREADY-impure clades -- no new bridge event, - # so it must NOT also render red (that's the "entire tree turns red" - # behaviour this function is specifically built to avoid). - assert color_func(14) == 'black' + assert color_func(5) == 'green' # Q+Q, monophyletic + assert color_func(6) == 'black' # P + {Q}: disjoint, clean bridge + assert color_func(7) == 'red' # P + {P,Q}: OVERLAP -- the actual tangle + # R joining afterward is disjoint from {P,Q} -- R was never part of the + # P/Q mixing, so this must NOT also render red just because it's above + # (contains) the node-7 tangle. This is the specific behaviour this + # rule exists for: a real, low-level tangle must not paint every + # ancestor red all the way to the root. + assert color_func(8) == 'black' def test_purity_link_color_func_custom_colors(): - data, labels = _two_clean_groups() - Z = linkage(data, method='ward') - color_func = purity_link_color_func(Z, labels, true_color='cyan', false_color='grey') - n_leaves = len(labels) - root_node_id = n_leaves + len(Z) - 1 - assert color_func(root_node_id) == 'grey' - assert color_func(n_leaves) == 'cyan' + Z, labels = _scattered_pair_linkage() + color_func = purity_link_color_func(Z, labels, true_color='cyan', false_color='magenta', neutral_color='grey') + assert color_func(5) == 'cyan' + assert color_func(6) == 'grey' + assert color_func(7) == 'magenta' + assert color_func(8) == 'grey' def test_purity_summary_one_mismatched_leaf_breaks_purity_for_its_group(): diff --git a/devnotes.md b/devnotes.md index c0c842d..a521abb 100644 --- a/devnotes.md +++ b/devnotes.md @@ -324,21 +324,42 @@ substitution pattern as `plot_ordination`'s method/view bar): Both views' purity math is the same Qt-free linkage-traversal logic in `clusterpurity.py`, unit-tested in `tests/test_clusterpurity.py`. -- **Red is "bridge" coloring, not "any impure ancestor"**: an earlier - version colored every impure merge red, including every ancestor above a - single mixing event all the way to the root -- since almost any real - dataset has *some* mixing somewhere, this painted most of the tree's - upper structure red regardless of how localized the actual problem was. - `purity_link_color_func()` now distinguishes three states per merge: pure - (`true_color`/green), a *bridge* -- impure, but at least one child was - itself pure, i.e. this is the specific merge where a different group - first gets bridged in (`false_color`/red), or *neutral* -- impure with - both children already impure, i.e. just continuing an already-known mix - further up the tree with no new information (`neutral_color`/black, the - same color as "no coloring" so it visually recedes). Verified with a - hand-built 4-group linkage matrix (deterministic merge order, no - clustering ambiguity) asserting the root of two already-mixed clades is - black, not red, while the two actual group-meeting points are red. +- **Red marks proven non-monophyly (overlap), not "any impure merge"**: two + earlier attempts both got this wrong in opposite directions. First, every + impure merge was colored red, including every ancestor above a single + mixing event all the way to the root -- since almost any real dataset has + *some* mixing somewhere, this painted most of the tree's upper structure + red regardless of how localized the problem was. The second attempt + ("impure but at least one child was pure = bridge = red, both children + already impure = neutral") fixed the worst of the cascading but still + mis-colored real data: it could still mark a high-level merge red merely + because one side happened to be a single freshly-introduced pure clade, + *and* it could miss real tangles where two already-impure children share + a label without one side being trivially pure. + + `purity_link_color_func()` now compares the two children's label sets + directly at each merge: + - identical and a single label -> monophyletic (`true_color`/green). + - **disjoint** (no label in common) -> neutral (`neutral_color`/black) -- + a clean join of two regions that don't contradict each other, *even if + one or both children are themselves impure from a different label's + tangle further down*. This is what stops a low-level tangle from + cascading: once a tangled label has nothing more of itself left to fold + in, every merge above it only ever joins disjoint regions, so it goes + back to black. + - **overlap** (share >=1 label, without being identical-and-singleton) -> + polyphyletic (`false_color`/red) -- definitive proof that some label's + leaves are split across this exact merge (present on both sides), not + just "still mixed from an earlier merge". + + Verified against the real example dataset's bootstrap dendrogram (the + case that exposed both earlier bugs): only the two merges that actually + re-unite a scattered sample's replicates (e.g. one sample's reps split + into two non-sister sub-clades that only meet again higher up) render + red; the higher-level merges joining that region with cleanly-resolved, + unrelated samples stay black, same as a hand-built synthetic linkage + (`tests/test_clusterpurity.py`'s `_scattered_pair_linkage`) reproducing + the same pattern deterministically. - **Bootstrap is now a per-tab checkbox, not a global one**: the plot-config dialog's "Bootstrap Analysis" checkbox (`checkBox_bootstrap`) only ever affected this one plot, so it moved into `plot_dendrogram`'s own From 66b3e0240a8a9d1aadda6fc237315cc55b23d713 Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Mon, 29 Jun 2026 19:04:35 -0400 Subject: [PATCH 13/16] Dendrogram: add Use Sample/Group Names labels; fix AU/BP label scaling - ordination.replicate_label_components() numbers each Injection's biological and technical replicate rank (BioRep#/TechRep#) within its Biolgroup/Sample, unconditionally (works fine when either count is 1). A new "Use Sample/Group Names" checkbox in the dendrogram's switcher bar swaps the raw file/injection names for _b_s (or _b alone in the Biological Replicates view) -- useful when the real file names are long or uninformative. - pvclust.plot_dendrogram's AU/BP annotations used a fixed icoord-unit x-shift that shrank to an ever-smaller pixel gap as leaf count grew (icoord-to-pixel ratio shrinks with more leaves in the same plot width), eventually merging "AU"/"BP" into illegible overlapping text. Fixed with ax.annotate(..., textcoords='offset points', ha='right'/'left'), which keeps a constant pixel gap regardless of leaf count, plus leaf-count-scaled fontsize. Also removed a plt.figure()/plt.tight_layout() pair that created and abandoned an unused Figure on every redraw. Co-Authored-By: Claude Sonnet 4.6 --- code/ordination.py | 47 +++++++++++++++++++++++++++++ code/plotting.py | 36 ++++++++++++++++++++-- code/pvclust.py | 56 +++++++++++++++++++++++------------ code/tests/test_ordination.py | 40 +++++++++++++++++++++++-- devnotes.md | 44 +++++++++++++++++++++++++-- 5 files changed, 198 insertions(+), 25 deletions(-) diff --git a/code/ordination.py b/code/ordination.py index c941453..5356010 100644 --- a/code/ordination.py +++ b/code/ordination.py @@ -100,6 +100,53 @@ def load_ordination_matrix(file, raw_msdata_header, collapse_replicates): return x, biolgroup +def replicate_label_components(raw_msdata_header): + """Derive (Biolgroup, BioRep#, TechRep#) for every Injection, for + building short ``biolgroupname_b_s``-style display + labels as an alternative to raw (sometimes long/uninformative) file + names -- used by the dendrogram tab's "Use Sample/Group Names" toggle. + + BioRep# is the 1-based rank of an Injection's Sample among all distinct + Samples sharing the same Biolgroup (first-seen order in the header); + TechRep# is the 1-based rank of the Injection among all Injections + sharing the same Sample (first-seen order). Both are always assigned + starting at 1, so a Biolgroup with only one Sample still gets "_b1", and + a Sample with only one Injection still gets "_s1" -- there's no minimum- + replicate-count special case to handle. + + Args: + raw_msdata_header: the peak table's 3 header rows, read raw + (``header=None, index_col=[0,1,2]).iloc[:3,:].transpose()``) -- + same format ``load_ordination_matrix`` takes. + + Returns: + DataFrame indexed by Injection, columns ``['Biolgroup', 'Sample', + 'BioRep', 'TechRep']`` (``BioRep``/``TechRep`` are 1-based ints). + """ + header = raw_msdata_header.copy() + header.columns = ['Biolgroup', 'Sample', 'Injection'] + + samples_seen_per_biolgroup = {} + biorep_of_sample = {} + for _, row in header.drop_duplicates('Sample').iterrows(): + biolgroup, sample = row['Biolgroup'], row['Sample'] + samples_seen_per_biolgroup.setdefault(biolgroup, []) + samples_seen_per_biolgroup[biolgroup].append(sample) + biorep_of_sample[sample] = len(samples_seen_per_biolgroup[biolgroup]) + + injections_seen_per_sample = {} + techrep_of_injection = {} + for _, row in header.iterrows(): + sample, injection = row['Sample'], row['Injection'] + injections_seen_per_sample.setdefault(sample, []) + injections_seen_per_sample[sample].append(injection) + techrep_of_injection[injection] = len(injections_seen_per_sample[sample]) + + header['BioRep'] = header['Sample'].map(biorep_of_sample) + header['TechRep'] = header['Injection'].map(techrep_of_injection) + return header.set_index('Injection')[['Biolgroup', 'Sample', 'BioRep', 'TechRep']] + + def autoscale(x): """Mean-center and scale each feature to unit variance ("UV-scaling" / "autoscaling" in chemometrics terminology -- the standard pre-treatment diff --git a/code/plotting.py b/code/plotting.py index 1467cd0..80133c9 100644 --- a/code/plotting.py +++ b/code/plotting.py @@ -826,6 +826,13 @@ class plot_dendrogram(ui_plot): switcher bar (formerly the plot-config dialog's global "Bootstrap Analysis" checkbox -- moved here since it only ever applied to this plot). The purity-coloring math lives in the Qt-free clusterpurity.py. + + A "Use Sample/Group Names" checkbox swaps the leaf labels from the raw + file/injection names (which can be long or uninformative) to + ``_b_s`` (Technical Replicates view) or + ``_b`` (Biological Replicates view, no TechRep# + since replicates are already collapsed) -- see + ``ordination.replicate_label_components()``. """ VIEWS = ('Technical Replicates', 'Biological Replicates') @@ -844,6 +851,7 @@ def __init__(self, parent, currplt, frame, file, filtereddfs, groupsets): self.view = 'Technical Replicates' self.color_mode = 'Purity' self.bootstrap = True + self.use_sample_names = False self._build_switcher_bar(parent, currplt) self.plot(parent, file, filtereddfs, groupsets) @@ -872,11 +880,17 @@ def _build_switcher_bar(self, parent, currplt): bootstrap_check.setChecked(self.bootstrap) bootstrap_check.toggled.connect(self._on_bootstrap_toggled) layout.addWidget(bootstrap_check) + + use_names_check = QtWidgets.QCheckBox('Use Sample/Group Names') + use_names_check.setChecked(self.use_sample_names) + use_names_check.toggled.connect(self._on_use_sample_names_toggled) + layout.addWidget(use_names_check) layout.addStretch() self.view_combo = view_combo self.color_combo = color_combo self.bootstrap_check = bootstrap_check + self.use_names_check = use_names_check parent.pltlayout[currplt].insertWidget(0, bar) def _on_view_changed(self, view): @@ -891,6 +905,22 @@ def _on_bootstrap_toggled(self, checked): self.bootstrap = checked self.reset(self._last_file, self._last_filtereddfs, self._last_groupsets) + def _on_use_sample_names_toggled(self, checked): + self.use_sample_names = checked + self.reset(self._last_file, self._last_filtereddfs, self._last_groupsets) + + def _display_labels(self, raw_header, textlabels): + """Build short ``Biolgroup_b#[_s#]`` leaf labels in place of the raw + file/injection names, when "Use Sample/Group Names" is checked.""" + components = ordination.replicate_label_components(raw_header) + if self.view == 'Biological Replicates': + per_sample = components.drop_duplicates('Sample').set_index('Sample') + return [f"{per_sample.loc[sample, 'Biolgroup']}_b{per_sample.loc[sample, 'BioRep']}" for sample in textlabels] + return [ + f"{components.loc[injection, 'Biolgroup']}_b{components.loc[injection, 'BioRep']}_s{components.loc[injection, 'TechRep']}" + for injection in textlabels + ] + def plot(self, parent, file, filtereddfs, groupsets): self._last_file = file self._last_filtereddfs = filtereddfs @@ -945,10 +975,12 @@ def plot(self, parent, file, filtereddfs, groupsets): else: link_color_func = None # plain black dendrogram, scipy's own default + display_labels = self._display_labels(raw_header, textlabels) if self.use_sample_names else textlabels + if self.bootstrap: - dend = pv.plot(parent.ax[self.currplt], labels=textlabels, link_color_func=link_color_func) + dend = pv.plot(parent.ax[self.currplt], labels=display_labels, link_color_func=link_color_func) else: - dend = shc.dendrogram(Z, ax=parent.ax[self.currplt], leaf_rotation=90, color_threshold=0, above_threshold_color='black', link_color_func=link_color_func, labels=textlabels) # default leaf label size 16 + dend = shc.dendrogram(Z, ax=parent.ax[self.currplt], leaf_rotation=90, color_threshold=0, above_threshold_color='black', link_color_func=link_color_func, labels=display_labels) # default leaf label size 16 if self.color_mode == 'Purity': n_pure, n_total = clusterpurity.purity_summary(Z, leaf_labels) diff --git a/code/pvclust.py b/code/pvclust.py index 1efc2dd..6943b83 100644 --- a/code/pvclust.py +++ b/code/pvclust.py @@ -282,9 +282,6 @@ def plot_dendrogram(linkage_matrix, pvalues, axis, labels=None, link_color_func= y = {i: j[1] for i, j in enumerate(ycoord)} pos = node_positions(y, x) - - plt.figure(figsize=(12, 8)) - plt.tight_layout() set_link_color_palette(['c', 'g']) # link_color_func, when given, takes priority over color_threshold/ # above_threshold_color (scipy's own precedence rule) -- that's how the @@ -295,25 +292,46 @@ def plot_dendrogram(linkage_matrix, pvalues, axis, labels=None, link_color_func= link_color_func=link_color_func) maxval = max(y.values()) ax = axis - for node, (x, y) in pos.items(): #modifications added to scale y axis label shifts + + # AU/BP labels used to be positioned with a fixed x-shift in icoord + # units (e.g. "x-7"). icoord spacing is always 10 units per leaf + # regardless of leaf count, but the AXES' actual pixel width is not -- + # with more leaves squeezed into the same plot width, each icoord unit + # maps to fewer pixels, so a fixed icoord offset shrinks to a fixed + # *fraction* of leaf spacing but an ever-shrinking number of *pixels*, + # eventually overlapping (e.g. "AU"/"BP" merging into "AUBP" once there + # are enough leaves). Anchoring with ha='right'/'left' plus a constant + # offset in POINTS (not data units) keeps a fixed pixel gap regardless + # of leaf count or icoord scale -- no more digit-width-dependent x-shift + # hack for AU values of 100 vs not. Per-node fontsize is similarly + # scaled down as leaf count grows, so neighbouring nodes' labels (which + # do have a fixed minimum icoord-and-therefore-pixel separation) don't + # run into each other either. + n_leaves = len(d['ivl']) + value_fontsize = max(5, min(8, 140 / n_leaves)) + header_fontsize = value_fontsize + 3 + gap_points = 2 + + for node, (nx, ny) in pos.items(): #modifications added to scale y axis label shifts + y_offset = ny + maxval / 200 if node == (len(pos.items())-1): - ax.text(x-6, y+maxval/200, 'AU', fontsize=11, fontweight='bold', - color='purple') - ax.text(x+1, y+maxval/200, 'BP', fontsize=11, fontweight='bold', - color='black') + ax.annotate('AU', xy=(nx, y_offset), xytext=(-gap_points, 0), + textcoords='offset points', ha='right', va='bottom', + fontsize=header_fontsize, fontweight='bold', color='purple') + ax.annotate('BP', xy=(nx, y_offset), xytext=(gap_points, 0), + textcoords='offset points', ha='left', va='bottom', + fontsize=header_fontsize, fontweight='bold', color='black') else: - if pvalues[node][0]*100 == 100: - ax.text(x-10, y+maxval/200, f' {pvalues[node][0]*100:.0f}', fontsize=8, - color='purple', fontweight='bold') - ax.text(x+1, y+maxval/200, f'{pvalues[node][1]*100:.0f}', fontsize=8, - color='black', fontweight='bold') - else: - ax.text(x-7, y+maxval/200, f' {pvalues[node][0]*100:.0f}', fontsize=8, - color='purple') - ax.text(x+1, y+maxval/200, f'{pvalues[node][1]*100:.0f}', fontsize=8, - color='black') -# plt.savefig('dendrogram.pdf') + au_significant = pvalues[node][0] * 100 == 100 + ax.annotate(f'{pvalues[node][0]*100:.0f}', xy=(nx, y_offset), xytext=(-gap_points, 0), + textcoords='offset points', ha='right', va='bottom', + fontsize=value_fontsize, color='purple', + fontweight='bold' if au_significant else 'normal') + ax.annotate(f'{pvalues[node][1]*100:.0f}', xy=(nx, y_offset), xytext=(gap_points, 0), + textcoords='offset points', ha='left', va='bottom', + fontsize=value_fontsize, color='black', + fontweight='bold' if au_significant else 'normal') def node_positions(x, y): diff --git a/code/tests/test_ordination.py b/code/tests/test_ordination.py index 1813600..f8dfc48 100644 --- a/code/tests/test_ordination.py +++ b/code/tests/test_ordination.py @@ -12,8 +12,8 @@ import pytest from ordination import ( - load_ordination_matrix, nmds_loading_proxy, run_nmds, run_pca, run_plsda, - top_loadings, + load_ordination_matrix, nmds_loading_proxy, replicate_label_components, + run_nmds, run_pca, run_plsda, top_loadings, ) @@ -76,6 +76,42 @@ def test_collapsed_values_are_the_mean_of_their_technical_replicates(tmp_path, m assert s1_row['feat1'].iloc[0] == pytest.approx(11.0) +# --------------------------------------------------------------------------- # +# replicate_label_components +# --------------------------------------------------------------------------- # + +def test_replicate_label_components_numbers_bio_and_tech_reps(tmp_path): + # Reuses the same fixture as the collapse tests: groupA has 2 Samples + # (S1, S1b -- BioRep 1 and 2), groupB has 1 Sample (S2 -- BioRep 1, the + # "only one biological replicate" edge case), every Sample has 3 + # Injections (TechRep 1-3). + path = tmp_path / 'example_filtered.csv' + _write_synthetic_filtered_csv(path) + components = replicate_label_components(_raw_header(path)) + + assert components.loc['inj1', ['Biolgroup', 'Sample', 'BioRep', 'TechRep']].tolist() == ['groupA', 'S1', 1, 1] + assert components.loc['inj3', ['Biolgroup', 'Sample', 'BioRep', 'TechRep']].tolist() == ['groupA', 'S1', 1, 3] + assert components.loc['inj4', ['Biolgroup', 'Sample', 'BioRep', 'TechRep']].tolist() == ['groupA', 'S1b', 2, 1] + # groupB has only one Sample -- still BioRep 1, not skipped/blank. + assert components.loc['inj7', ['Biolgroup', 'Sample', 'BioRep', 'TechRep']].tolist() == ['groupB', 'S2', 1, 1] + + +def test_replicate_label_components_single_technical_replicate(tmp_path): + # Edge case: a Sample with only one Injection should still get + # TechRep=1, not be skipped or raise. + path = tmp_path / 'single_techrep_filtered.csv' + with open(path, 'w') as f: + f.write(',,,groupA,groupA,groupB\n') + f.write(',,,S1,S1b,S2\n') + f.write('Compound,m/z,Retention time (min),inj1,inj2,inj3\n') + f.write('feat1,100.0,1.0,10,30,50\n') + components = replicate_label_components(_raw_header(path)) + + assert components.loc['inj1', ['Sample', 'BioRep', 'TechRep']].tolist() == ['S1', 1, 1] + assert components.loc['inj2', ['Sample', 'BioRep', 'TechRep']].tolist() == ['S1b', 2, 1] + assert components.loc['inj3', ['Sample', 'BioRep', 'TechRep']].tolist() == ['S2', 1, 1] + + # --------------------------------------------------------------------------- # # run_pca / run_nmds / run_plsda / top_loadings # --------------------------------------------------------------------------- # diff --git a/devnotes.md b/devnotes.md index a521abb..ece3cf0 100644 --- a/devnotes.md +++ b/devnotes.md @@ -291,8 +291,10 @@ only handles the combo boxes, axes, and pick events. ## Dendrogram purity coloring (`plotting.plot_dendrogram`, `clusterpurity.py`) -The dendrogram tab has two combo-box switchers (same runtime-widget- -substitution pattern as `plot_ordination`'s method/view bar): +The dendrogram tab has a switcher bar (same runtime-widget-substitution +pattern as `plot_ordination`'s method/view bar) with two combo boxes (View, +Color) and two checkboxes (Bootstrap, Use Sample/Group Names -- both +documented further down, formerly/newly local to this tab respectively): - **View** — which leaves to cluster: - **Technical Replicates** (default — matches the tab's original @@ -408,6 +410,44 @@ Both views' purity math is the same Qt-free linkage-traversal logic in `freeze_support()` specifically). The real app is fine — `main.py` already guards its entry point — but throwaway test scripts need the same discipline. +- **"Use Sample/Group Names" leaf labels** (`ordination.replicate_label_components()`): + swaps the raw file/injection names for `_b_s` + (Technical Replicates view) or `_b` (Biological + Replicates view -- no TechRep#, since that view already collapsed + technical replicates), for when the real file names are long or + uninformative. BioRep# is the 1-based rank of a Sample within its + Biolgroup (first-seen order); TechRep# is the 1-based rank of an + Injection within its Sample -- both numbers are assigned unconditionally, + so a Biolgroup with only one Sample still shows `_b1` and a Sample with + only one Injection still shows `_s1` (no special-casing needed for either + edge case, verified in `test_ordination.py`). This only changes the + `labels=` argument passed to `dendrogram()`/`PvClust.plot()` -- the + underlying data orientation, clustering, and purity-coloring lookups all + still key off the raw names internally. +- **AU/BP label scaling, regardless of leaf count**: the bootstrap + dendrogram's per-node AU/BP annotations used to be positioned with a + fixed x-shift in *icoord* units (e.g. `x-7`, with a separate `x-10` for + 3-digit "100" values). icoord spacing is always 10 units per leaf no + matter how many leaves there are, but the axes' actual pixel width isn't + -- with more leaves squeezed into the same plot width, each icoord unit + maps to fewer and fewer pixels, so that fixed icoord offset shrinks to an + ever-smaller *pixel* gap, eventually merging "AU"/"BP" into "AUBP" (and + every node's AU/BP pair into illegible overlapping text) once there are + enough leaves. Fixed by switching to `ax.annotate(..., xytext=(±2, 0), + textcoords='offset points', ha='right'/'left')`: a constant gap in + *points* (real pixels-at-a-given-DPI) stays a constant gap regardless of + icoord scale or leaf count, and `ha='right'`/`ha='left'` anchoring makes + the old digit-width-dependent branching (-7 vs -10) unnecessary entirely. + Per-node fontsize is also now scaled down as leaf count grows + (`max(5, min(8, 140 / n_leaves))`) so neighbouring *different* nodes' + labels -- which do have a fixed minimum icoord (and therefore pixel) + separation -- don't run into each other either. Verified by rendering + both a 6-leaf and a 27-leaf synthetic tree (matching the real 9-sample + x3-techrep dataset's leaf count) and visually confirming no overlap in + either. Also removed a `plt.figure(figsize=(12,8))`/`plt.tight_layout()` + pair that created and immediately abandoned an unused Figure on every + call -- it never affected the actual target `axis` and was a real (if + small) per-redraw resource leak. ## Treemap / upset plot canvases (`plotting.plot_treemap`, `plotting.plot_upset`) From 9697aa38e4147d1d4cfd4ee776e6be72a5737c21 Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Mon, 29 Jun 2026 19:23:33 -0400 Subject: [PATCH 14/16] Docs: update mkdocs guide for ordination rework and dendrogram improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - multivariate.md: rewrite from NMDS-only to cover the full PCA/NMDS/PLS-DA ordination tab — method switcher, scores/loadings view, collapse-replicates checkbox, stress metric for NMDS, %explained for PCA/PLS-DA. - group-analysis.md: document all four dendrogram switcher-bar controls (View, Color, Bootstrap, Use Sample/Group Names). - changelog.md: add 2026 entries for ordination rework, dendrogram purity coloring/switchers/label options, AU/BP annotation fix, and canvas-plot UpSet/treemap. - development.md: add ordination.py, clusterpurity.py, csvcache.py to the hand-written-code list; add ordination to the test-coverage list. - index.md: expand the "mid-2026 updates" note to mention ordination and dendrogram reworks. Co-Authored-By: Claude Sonnet 4.6 --- docs/changelog.md | 27 ++++++++++++++++-- docs/development.md | 9 ++++-- docs/index.md | 12 ++++---- docs/plots/group-analysis.md | 51 +++++++++++++++++++++++++++++---- docs/plots/multivariate.md | 55 +++++++++++++++++++++++++++--------- 5 files changed, 125 insertions(+), 29 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index b58901b..c9475ef 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -9,8 +9,31 @@ Metaboscape) and automatic GNPS2-compatible re-indexing of MSP/MGF fragment databases on export. - MVC refactor of "Plot Feature Sets" (`groupsets.py`). -- `fastcluster` optional acceleration for hierarchical clustering/bootstrap - dendrogram. +- Multivariate ordination rework: the former NMDS-only tab now supports + **PCA**, **NMDS**, and **PLS-DA** via a method switcher bar, plus a + scores/loadings view toggle and a loadings highlight synced to feature + selection elsewhere in the app. PCA/PLS-DA features are autoscaled + (mean-center + unit variance) before fitting; NMDS stays on raw + abundances (conventional for Bray-Curtis). A new Qt-free backend + (`ordination.py`) handles all ordination math and is covered by + headless unit tests. +- Dendrogram rework: + - Per-plot switcher bar for **View** (Technical vs Biological + Replicates), **Color** (Purity / None), **Bootstrap**, and + **Use Sample/Group Names**. + - Purity coloring: green branches are cleanly within-sample/group; + red branches mark the exact merge point where two groups' leaves + overlap (proven non-monophyly). A title reports how many + samples/groups are fully correctly clustered. + - "Use Sample/Group Names" checkbox: replaces raw injection/file names + with `_b<#>_s<#>` (Technical Replicates view) or + `_b<#>` (Biological Replicates view). + - Bootstrap can be toggled off for a faster undecorated dendrogram. + - Fixed AU/BP annotation alignment: labels now stay a constant pixel + gap apart regardless of leaf count, with leaf-count-scaled font size. + - `fastcluster` optional acceleration for bootstrap linkage. +- UpSet and treemap plots are now rendered directly on a Qt canvas + (replacing a PNG round-trip). - Headless unit test suite (`code/tests/`). - Hardened dependency installer to prevent NumPy 2.x environment breaks. - Numerous bugfixes: Spearman double-colorbar on re-run, highlight not diff --git a/docs/development.md b/docs/development.md index 246d38e..0879d35 100644 --- a/docs/development.md +++ b/docs/development.md @@ -15,6 +15,9 @@ in the repo root. - **Hand-written app code:** `main.py` (`MainWindow`, run/save/load, database search), `plotting.py` (plot classes), `filter.py`, `stats.py`, `MSFaST.py` (analysis driver), `pvclust.py` (bootstrap dendrogram), + `ordination.py` (Qt-free PCA/NMDS/PLS-DA backend), + `clusterpurity.py` (dendrogram branch-purity logic), + `csvcache.py` (cached CSV reads for the ordination data path), `translators.py` (import/export framework), `mzmineimport.py` (format conversion), `getfragdb.py`, `mspwriter.py`. - **Canonical peak table** format (what `MSFaST` consumes internally; @@ -59,9 +62,9 @@ Headless unit tests live in `code/tests/` (pure-logic only — no Qt): python -m pytest code/tests -q ``` -Covers `filter`, `stats`, `importdependencies`, `translators`, and -`groupsets`. Add tests here for any new Qt-free logic. GUI behaviour can't -be tested headlessly — verify it by running the app. +Covers `filter`, `stats`, `importdependencies`, `translators`, +`groupsets`, and `ordination`. Add tests here for any new Qt-free logic. +GUI behaviour can't be tested headlessly — verify it by running the app. ## Conventions diff --git a/docs/index.md b/docs/index.md index 7d36644..f61cc62 100644 --- a/docs/index.md +++ b/docs/index.md @@ -25,8 +25,10 @@ shows. If you're contributing to MPACT itself, see [Development](development.md) This documentation is adapted from the original MPACT user guide (2022) and updated to reflect the current codebase (mid-2026), including the import/export translator framework for MZmine/MS-DIAL/ - Metaboscape peak tables, the background-threaded analysis run, and the - groupset (Plot Feature Sets) editor. Some screenshots referenced in - the original guide have not been re-captured yet — see - [Development](development.md) if you'd like to contribute updated - images. + Metaboscape peak tables, the background-threaded analysis run, the + groupset (Plot Feature Sets) editor, the multivariate ordination + rework (PCA/NMDS/PLS-DA with scores and loadings views), and the + dendrogram rework (purity coloring, view/bootstrap/label switchers). + Some screenshots referenced in the original guide have not been + re-captured yet — see [Development](development.md) if you'd like to + contribute updated images. diff --git a/docs/plots/group-analysis.md b/docs/plots/group-analysis.md index f41eeb9..732a98b 100644 --- a/docs/plots/group-analysis.md +++ b/docs/plots/group-analysis.md @@ -31,12 +31,51 @@ For datasets where biological/treatment differences should exceed technical noise, technical replicates should cluster together after filtering. -Bootstrap analysis (1000 iterations) can be enabled in the plot options -dialog to annotate the dendrogram with approximately-unbiased (AU) p-values -and bootstrap probabilities (BP). AU values above 95 are generally -considered statistically significant. Bootstrap computation uses -`fastcluster` if it's installed (falling back to SciPy's hierarchical -clustering otherwise) for substantially faster linkage on large datasets. +A settings bar above the plot controls how it's drawn — all four options +are local to this tab (not the general plot options dialog) and redraw +immediately when changed: + +**View** — which leaves to cluster: + +- **Technical Replicates** (default): every injection is its own leaf, + letting you see whether each sample's individual injections agree with + each other. +- **Biological Replicates**: technical replicates are averaged together + first (one leaf per sample), so the plot reflects clustering of + biological/treatment groups without technical noise. + +**Color** — how branches are colored: + +- **Purity** (default): a branch is colored **green** if every leaf beneath + it belongs to the same sample (Technical Replicates view) or the same + treatment group (Biological Replicates view) — i.e. it's correctly, + unambiguously clustered. A branch is colored **red** if it's the specific + point where two different samples/groups' leaves are proven to overlap + (some of that sample's/group's replicates are on each side of the + split) — a real sign of poor clustering, not just "still mixed from + somewhere lower in the tree." Every other branch (a clean join of two + unrelated, already-resolved regions) stays black, even if it sits above a + red branch elsewhere in the tree — so a single tangled sample doesn't + paint the whole tree red. The plot title reports how many + samples/groups are *fully* correctly clustered (e.g. "7/9 samples' + replicates clustered together"). +- **None**: a plain, uncolored dendrogram with no title — useful if you + just want the clustering shape without the QC overlay. + +**Bootstrap** — when checked (default on), runs bootstrap resampling +(1000 iterations) and annotates the dendrogram with approximately-unbiased +(AU) p-values and bootstrap probabilities (BP) at each branch point. AU +values above 95 are generally considered statistically significant. +Bootstrap computation uses `fastcluster` if it's installed (falling back to +SciPy's hierarchical clustering otherwise) for substantially faster linkage +on large datasets. Uncheck for a faster, unannotated dendrogram. + +**Use Sample/Group Names** — when checked, leaf labels switch from the raw +injection/file names (which can be long or uninformative) to +`_b<#>_s<#>` (Technical Replicates view) or `_b<#>` +(Biological Replicates view), where `b<#>` numbers each biological +replicate (sample) within its group and `s<#>` numbers each technical +replicate (injection) within its sample. ![Dendrogram](../images/dendrogram.png) *MPACT dendrogram after filtering, showing correct clustering of most diff --git a/docs/plots/multivariate.md b/docs/plots/multivariate.md index 30aa180..013e37f 100644 --- a/docs/plots/multivariate.md +++ b/docs/plots/multivariate.md @@ -1,17 +1,46 @@ -# Multivariate Analysis (NMDS) +# Multivariate Analysis -Nonmetric multidimensional scaling (NMDS) reduces the high-dimensional -metabolomics dataset to a low-dimensional view that's easy to interpret -visually. Samples are plotted by biological group, with 95% confidence -ellipses shown per group — samples that are closer together are more -similar in overall metabolome. +Reduces the high-dimensional metabolomics dataset to a low-dimensional +view. Three ordination methods are available: **NMDS**, **PCA**, and +**PLS-DA** — all operating on the same samples × features intensity +matrix. -By default, technical replicates are averaged so NMDS runs at the -sample level. To instead evaluate clustering of individual technical -replicates, uncheck replicate averaging in the plot options dialog -(the small square button in the plot's toolbar). +A settings bar above the plot controls how it's drawn and redraw +immediately on any change: + +**Method** — which ordination to run: + +- **NMDS** (default): nonmetric multidimensional scaling using + Bray-Curtis dissimilarity. A rank-based embedding — samples that are + closer together are more similar in overall metabolome. The plot + title shows the NMDS stress (the conventional fit-quality metric); + axis labels are NMDS1/NMDS2 (no percent-explained, since NMDS is + not a linear decomposition of the feature space). +- **PCA**: principal component analysis on mean-centered, + unit-variance-scaled features. Axis labels show percent of + total variance explained by each component. +- **PLS-DA**: partial least-squares discriminant analysis, supervised + by biological group. Axis labels show percent of explained variance + per component. Useful when NMDS/PCA show overlapping groups but + there's a genuine biological difference you want to maximize the + separation of. + +**View** — which aspect of the ordination to plot: + +- **Scores** (default): each sample (or injection) as a point, + coloured by biological group, with 95% confidence ellipses per + group. +- **Loadings**: the top 25 features that most drive the separation, + shown as arrows from the origin. Selecting a feature in another + plot highlights it here (yellow marker at its loadings position). + +**Collapse Replicates** — when checked (default on), technical +replicates are averaged together before running ordination, so each +point represents one biological sample. Uncheck to treat every +individual injection as its own point, which can be useful for +diagnosing injection-level outliers. ![NMDS plot](../images/nmds-plot.png) -*MPACT NMDS plot with technical-replicate averaging, showing differences -between samples and biological groups, with shaded ovals denoting 95% -confidence intervals.* +*MPACT multivariate ordination (NMDS scores view) with technical-replicate +averaging, showing differences between samples and biological groups, +with shaded ovals denoting 95% confidence intervals.* From 52c1d37488618acdf32b0622f563d277ab178e09 Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Tue, 30 Jun 2026 01:21:42 -0400 Subject: [PATCH 15/16] correlation matrix control improvements --- code/clusterpurity.py | 17 ++- code/ordination.py | 33 +++++ code/plotting.py | 206 +++++++++++++++++++++------ code/tests/test_clusterpurity.py | 16 +-- code/tests/test_ordination.py | 46 +++++- code/ui_functions.py | 13 +- devnotes.md | 113 ++++++++++++--- docs/changelog.md | 12 +- docs/development.md | 122 ---------------- docs/index.md | 9 +- docs/plots/group-analysis.md | 73 +++++++--- docs/troubleshooting.md | 4 +- docs/user-guide/analysis-settings.md | 6 +- mkdocs.yml | 1 - 14 files changed, 444 insertions(+), 227 deletions(-) delete mode 100644 docs/development.md diff --git a/code/clusterpurity.py b/code/clusterpurity.py index 5e8ebee..a6971df 100644 --- a/code/clusterpurity.py +++ b/code/clusterpurity.py @@ -10,17 +10,22 @@ cluster tightly together, and separately whether biological replicates of one Biolgroup are well separated from other groups. +Default colors are green/magenta rather than the more conventional +green/red -- red-green colorblindness (the most common form) makes the two +indistinguishable; magenta stays distinguishable from green under all +common forms of color vision deficiency. + This module is Qt-free and unit-tested (see ``tests/test_clusterpurity.py``). """ -def purity_link_color_func(Z, leaf_labels, true_color='green', false_color='red', neutral_color='black'): +def purity_link_color_func(Z, leaf_labels, true_color='green', false_color='magenta', neutral_color='black'): """Build a ``link_color_func`` for ``scipy.cluster.hierarchy.dendrogram``. Three-way coloring, classified by comparing the two children's label sets (not by simply asking "is the merge result impure", which would - paint every ancestor of a single mixing event red all the way to the - root): + paint every ancestor of a single mixing event false_color all the way to + the root): - ``true_color`` ("monophyletic"): the two children's label sets are identical and contain exactly one label -- every leaf under this link @@ -35,9 +40,9 @@ def purity_link_color_func(Z, leaf_labels, true_color='green', false_color='red' contradict each other; it's a clean bridge even if one or both children are themselves impure from a *different* label's tangle further down. This is what keeps a single low-level tangle from - cascading red all the way up the tree: once a tangled label's clade - stops growing (nothing more of that label to fold in), every merge - above it only ever joins disjoint regions, so it reverts to + cascading false_color all the way up the tree: once a tangled label's + clade stops growing (nothing more of that label to fold in), every + merge above it only ever joins disjoint regions, so it reverts to ``neutral_color``. Args: diff --git a/code/ordination.py b/code/ordination.py index 5356010..7829e9c 100644 --- a/code/ordination.py +++ b/code/ordination.py @@ -241,6 +241,39 @@ def nmds_loading_proxy(x, scores): ) +def similarity_matrix(x, method): + """Pairwise similarity between samples (rows of ``x``, a samples x + features intensity matrix) -- backs the sample-correlation heatmap's + "Method" switcher. + + - ``'Spearman'``: rank correlation of abundance profiles. The + established default for metabolomics QC (robust to the non-normal, + heavy-tailed abundance distributions typical of LC-MS data); values + in [-1, 1]. + - ``'Jaccard'``: 1 - Jaccard distance on which features are detected + (abundance > 0) in each sample, ignoring relative abundance + entirely -- useful when what matters is which compounds were + detected at all rather than how much; values in [0, 1]. + - ``'Bray-Curtis'``: 1 - Bray-Curtis dissimilarity, the standard + abundance-weighted similarity measure in ecology/metabolomics, + computed on raw abundances (same convention as ``run_nmds``'s + dissimilarity, unlike PCA/PLS-DA's autoscaled features); values in + [0, 1]. + + Returns a samples x samples DataFrame. + """ + if method == 'Spearman': + return x.transpose().corr(method='spearman') + x_filled = x.fillna(0) + if method == 'Jaccard': + dist = pairwise_distances((x_filled > 0).values, metric='jaccard') + elif method == 'Bray-Curtis': + dist = pairwise_distances(x_filled.values, metric='braycurtis') + else: + raise ValueError(f'Unknown similarity method: {method!r}') + return pd.DataFrame(1 - dist, index=x.index, columns=x.index) + + def run_plsda(x, y, n_components): """PLS-DA: PLS regression of the samples x features matrix against one-hot-encoded group labels. diff --git a/code/plotting.py b/code/plotting.py index 80133c9..137f2a5 100644 --- a/code/plotting.py +++ b/code/plotting.py @@ -579,57 +579,169 @@ def plot(self, parent, file, filtereddfs, groupsets): # abundance tied opacity u class plot_samplecorr(ui_plot): """ - The plot_samplecorr class generates a heatmap plot of the Spearman or Pearson correlation between samples. - - Parameters: - - parent: the parent widget for the plot - currplt: the index of the current plot within the parent widget - frame: the parent frame for the plot - file: a path to the file containing the ion dictionary - filtereddfs: a dictionary containing filtered dataframes for each group in the plot - groupsets: a dictionary containing GroupSet objects for each group in the plot - Methods: - - __init__(self, parent, currplt, frame, file, filtereddfs, groupsets): initializes the plot by calling the plot() method with the given parameters - plot(self, parent, file, filtereddfs, groupsets): generates the plot with the given data. Reads the ion dictionary from a csv file and reads the filtered data from a csv file generated by the program. Calculates the Spearman correlation matrix and generates a heatmap plot using the Seaborn library. Adjusts the layout of the plot and draws it on the parent canvas. + Sample-correlation heatmap, with a Method/View/label switcher bar + inserted into the *shared* Group Analysis nav bar (frame_12 / + horizontalLayout_25, alongside the pre-existing "Sets"/"Sample + Correlations" buttons) rather than this plot's own canvas -- unlike + plot_dendrogram/plot_ordination's per-canvas bars, these controls only + apply to this page, and the nav bar is shared with the UpSet plot page. + UIFunctions.switch_grpanalysis_tab (ui_functions.py) greys the controls + out when the UpSet page is active. + + Views (same collapsing semantics as plot_dendrogram/plot_ordination, + via ordination.load_ordination_matrix): + - "Biological Replicates" (default, matches this plot's previous, + checkbox-less behaviour): technical replicates averaged per Sample, + biological replicates kept separate. + - "Individual Injections": no averaging, every injection is its own + row/column. + - "Biological Groups": both technical and biological replicates + averaged together, one row/column per Biolgroup -- "see only + biological groups" with technical-replicate averaging implied. + + Methods (ordination.similarity_matrix): + - "Spearman" (default): rank correlation of abundance profiles. + - "Jaccard": presence/absence (feature detected or not), ignoring + abundance. + - "Bray-Curtis": abundance-weighted, the standard ecology/metabolomics + similarity measure. + + "Use Sample/Group Names" checkbox: same nomenclature as the + dendrogram's -- ``_b_s`` (Individual + Injections), ``_b`` (Biological Replicates), or the + raw Biolgroup name (Biological Groups -- nothing left to shorten). """ + + VIEWS = ('Biological Replicates', 'Individual Injections', 'Biological Groups') + METHODS = ('Spearman', 'Jaccard', 'Bray-Curtis') + def __init__(self, parent, currplt, frame, file, filtereddfs, groupsets): ui_plot.__init__(self, parent, currplt, frame) self.parent = parent self.currplt = currplt + self.view = 'Biological Replicates' + self.method = 'Spearman' + self.use_sample_names = False + self._build_grpanalysis_controls(parent) self.plot(parent, file, filtereddfs, groupsets) - + + def _build_grpanalysis_controls(self, parent): + bar = QtWidgets.QWidget() + bar.setStyleSheet(_SWITCHER_BAR_STYLE) + bar.setMaximumHeight(_SWITCHER_BAR_HEIGHT) + layout = QtWidgets.QHBoxLayout(bar) + layout.setContentsMargins(4, 2, 4, 2) + + layout.addWidget(QtWidgets.QLabel('Method:')) + method_combo = QtWidgets.QComboBox() + method_combo.addItems(self.METHODS) + method_combo.setCurrentText(self.method) + method_combo.currentTextChanged.connect(self._on_method_changed) + layout.addWidget(method_combo) + + layout.addWidget(QtWidgets.QLabel('View:')) + view_combo = QtWidgets.QComboBox() + view_combo.addItems(self.VIEWS) + view_combo.setCurrentText(self.view) + view_combo.currentTextChanged.connect(self._on_view_changed) + layout.addWidget(view_combo) + + use_names_check = QtWidgets.QCheckBox('Use Sample/Group Names') + use_names_check.setChecked(self.use_sample_names) + use_names_check.toggled.connect(self._on_use_sample_names_toggled) + layout.addWidget(use_names_check) + + self.method_combo = method_combo + self.view_combo = view_combo + self.use_names_check = use_names_check + self.controls_bar = bar + + # Pushes the new controls to the right of the pre-existing + # Sets/Sample Correlations buttons within the same shared bar, + # rather than editing the generated horizontalLayout_25 itself. + parent.ui.horizontalLayout_25.addStretch(1) + parent.ui.horizontalLayout_25.addWidget(bar) + + def set_controls_enabled(self, enabled): + """Grey the Method/View/Use-Names controls out when the UpSet Plot + page (the nav bar's other tab) is active -- they don't apply there.""" + self.controls_bar.setEnabled(enabled) + + def _on_method_changed(self, method): + self.method = method + self.reset(self._last_file, self._last_filtereddfs, self._last_groupsets) + + def _on_view_changed(self, view): + self.view = view + self.reset(self._last_file, self._last_filtereddfs, self._last_groupsets) + + def _on_use_sample_names_toggled(self, checked): + self.use_sample_names = checked + self.reset(self._last_file, self._last_filtereddfs, self._last_groupsets) + + def _load_matrix(self, parent): + pltfile = parent.analysis_paramsgui.outputdir / (parent.analysis_paramsgui.filename.stem + '_filtered.csv') + raw_header = cached_read_csv(pltfile, sep=',', header=None, index_col=[0, 1, 2]).iloc[:3, :].transpose() + x, biolgroup = ordination.load_ordination_matrix( + pltfile, raw_header.copy(), collapse_replicates=(self.view != 'Individual Injections')) + if self.view == 'Biological Groups': + x = x.groupby(biolgroup).mean() + return x, raw_header + + def _display_labels(self, raw_header, leaf_names): + """Build short ``Biolgroup_b#[_s#]`` labels, mirroring + plot_dendrogram's ``_display_labels`` -- nomenclature switches on + the active View the same way.""" + if self.view == 'Biological Groups': + return leaf_names # already bare Biolgroup names + components = ordination.replicate_label_components(raw_header) + if self.view == 'Biological Replicates': + per_sample = components.drop_duplicates('Sample').set_index('Sample') + return [f"{per_sample.loc[sample, 'Biolgroup']}_b{per_sample.loc[sample, 'BioRep']}" for sample in leaf_names] + return [ + f"{components.loc[injection, 'Biolgroup']}_b{components.loc[injection, 'BioRep']}_s{components.loc[injection, 'TechRep']}" + for injection in leaf_names + ] + def plot(self, parent, file, filtereddfs, groupsets): - iondict = cached_read_csv(self.parent.analysis_paramsgui.outputdir / 'iondict.csv', sep=',', header=[0], index_col=None) - msdata = cached_read_csv(self.parent.analysis_paramsgui.outputdir / (self.parent.analysis_paramsgui.filename.stem + '_filtered.csv'), sep=',', header=[0, 1, 2], index_col=[0, 1, 2]) - try: - msdata = msdata.stack([0, 1, 2], future_stack=True).groupby(level=[0, 1, 2, 3, 4]).mean().droplevel(level=3, axis=0).unstack() - except TypeError: - msdata = msdata.stack([0, 1, 2]).groupby(level=[0, 1, 2, 3, 4]).mean().droplevel(level=3, axis=0).unstack() - msdata.index = msdata.index.droplevel([1, 2]) - pmatrix = msdata.corr(method='spearman') + self._last_file = file + self._last_filtereddfs = filtereddfs + self._last_groupsets = groupsets + + x, raw_header = self._load_matrix(parent) + pmatrix = ordination.similarity_matrix(x, self.method) + + leaf_names = pmatrix.columns.tolist() + display_labels = self._display_labels(raw_header, leaf_names) if self.use_sample_names else leaf_names + fig = self.parent.fig[self.currplt] - ax = self.parent.ax[self.currplt] - # Remove any axes left over from a previous run (notably the colorbar - # that sns.heatmap appends). Without this a new colour-legend bar is - # stacked onto the figure every time the plot is regenerated. - for extra_ax in list(fig.axes): - if extra_ax is not ax: - extra_ax.remove() - ax.clear() + # clf() + add_subplot: sns.heatmap permanently shrinks the axes each + # call to make room for its colorbar; removing the extra colorbar axes + # afterwards doesn't restore the original size. Starting fresh each + # call keeps the axes at a consistent width. + fig.clf() + ax = fig.add_subplot(111) + ax.set_facecolor(self.plotbackground) + ax.set_axisbelow(True) + self.parent.ax[self.currplt] = ax + # Spearman is mathematically capable of going negative, but real + # sample-vs-sample correlations in practice cluster tightly positive + # (e.g. 0.7-1.0) -- a -1..1 scale would compress all of that + # meaningful variation into a sliver of the colour range. 0..1 for + # every method keeps the full range informative. sns.heatmap(pmatrix, ax=ax, cmap=self.parent.analysis_paramsgui.colorscheme, vmin=0, vmax=1) ax.tick_params(axis='both', which='both', labelsize=10) ax.set_xticks(range(len(pmatrix.columns))) - ax.set_xticklabels(pmatrix.columns, rotation=90) + ax.set_xticklabels(display_labels, rotation=90) ax.set_yticks(range(len(pmatrix.index))) - ax.set_yticklabels(pmatrix.index, rotation=0) + ax.set_yticklabels(display_labels, rotation=0) ax.axes.get_xaxis().get_label().set_visible(False) ax.axes.get_yaxis().get_label().set_visible(False) + ax.set_title(self.method, fontsize=10) self.parent.fig[self.currplt].subplots_adjust(left=.1, right=.95, bottom=0.15, top=0.9, hspace=0.2, wspace=0.2) self.parent.canvas[self.currplt].draw() - - + + class kendrick(ui_plot): """ The purpose of this class is to plot the mass defect versus the nominal mass of compounds based on the input files and parameters provided. @@ -816,8 +928,9 @@ class plot_dendrogram(ui_plot): Coloring: - "Purity": green wherever a branch's leaves are entirely one group - (correctly clustered), red wherever a branch mixes more than one - group (polyphyletic). + (correctly clustered), magenta wherever a branch mixes more than one + group (polyphyletic). Green/magenta rather than the more conventional + green/red since red-green colorblindness can't distinguish the latter. - "None": plain black dendrogram, no purity coloring or title -- the tab's original (pre-purity-coloring) appearance. @@ -969,9 +1082,11 @@ def plot(self, parent, file, filtereddfs, groupsets): Z = shc.linkage(data_for_linkage, method='ward') if self.color_mode == 'Purity': - # Green = monophyletic (correctly clustered); red = polyphyletic - # (mixes more than one group). - link_color_func = clusterpurity.purity_link_color_func(Z, leaf_labels, true_color='green', false_color='red') + # Green = monophyletic (correctly clustered); magenta = + # polyphyletic (mixes more than one group). Magenta rather than + # the conventional red -- distinguishable from green under + # red-green colorblindness, the most common form. + link_color_func = clusterpurity.purity_link_color_func(Z, leaf_labels, true_color='green', false_color='magenta') else: link_color_func = None # plain black dendrogram, scipy's own default @@ -1010,10 +1125,21 @@ def plot(self, parent, file, filtereddfs, groupsets): border-radius: 2px; padding: 2px; } +QComboBox:disabled { + background-color: rgb(220,220,220); + color: rgb(150,150,150); + border: 1px solid rgb(195,195,195); +} QLabel { color: rgb(30,30,30); background: transparent; } +QLabel:disabled { + color: rgb(150,150,150); +} +QCheckBox:disabled { + color: rgb(150,150,150); +} """ diff --git a/code/tests/test_clusterpurity.py b/code/tests/test_clusterpurity.py index 87cc3de..490544e 100644 --- a/code/tests/test_clusterpurity.py +++ b/code/tests/test_clusterpurity.py @@ -72,18 +72,18 @@ def test_purity_link_color_func_clean_disjoint_groups_stay_neutral_even_at_root( assert color_func(node_id) == 'green' -def test_purity_link_color_func_overlap_is_the_only_red_and_it_does_not_cascade(): +def test_purity_link_color_func_overlap_is_the_only_false_color_and_it_does_not_cascade(): Z, labels = _scattered_pair_linkage() color_func = purity_link_color_func(Z, labels) - assert color_func(5) == 'green' # Q+Q, monophyletic - assert color_func(6) == 'black' # P + {Q}: disjoint, clean bridge - assert color_func(7) == 'red' # P + {P,Q}: OVERLAP -- the actual tangle + assert color_func(5) == 'green' # Q+Q, monophyletic + assert color_func(6) == 'black' # P + {Q}: disjoint, clean bridge + assert color_func(7) == 'magenta' # P + {P,Q}: OVERLAP -- the actual tangle # R joining afterward is disjoint from {P,Q} -- R was never part of the - # P/Q mixing, so this must NOT also render red just because it's above - # (contains) the node-7 tangle. This is the specific behaviour this - # rule exists for: a real, low-level tangle must not paint every - # ancestor red all the way to the root. + # P/Q mixing, so this must NOT also render false_color just because it's + # above (contains) the node-7 tangle. This is the specific behaviour + # this rule exists for: a real, low-level tangle must not paint every + # ancestor false_color all the way to the root. assert color_func(8) == 'black' diff --git a/code/tests/test_ordination.py b/code/tests/test_ordination.py index f8dfc48..992434e 100644 --- a/code/tests/test_ordination.py +++ b/code/tests/test_ordination.py @@ -13,7 +13,7 @@ from ordination import ( load_ordination_matrix, nmds_loading_proxy, replicate_label_components, - run_nmds, run_pca, run_plsda, top_loadings, + run_nmds, run_pca, run_plsda, similarity_matrix, top_loadings, ) @@ -186,6 +186,50 @@ def test_nmds_smoke_test_on_clustered_data(): assert proxy.values.min() >= -1.0001 and proxy.values.max() <= 1.0001 +# --------------------------------------------------------------------------- # +# similarity_matrix +# --------------------------------------------------------------------------- # + +def test_similarity_matrix_spearman_self_correlation_is_one(): + x = pd.DataFrame( + [[1.0, 2.0, 3.0], [3.0, 2.0, 1.0], [1.0, 5.0, 2.0]], + index=['s1', 's2', 's3'], columns=['f1', 'f2', 'f3'], + ) + sim = similarity_matrix(x, 'Spearman') + assert sim.shape == (3, 3) + assert np.allclose(np.diag(sim.values), 1.0) + # s1 and s2 are perfectly rank-anticorrelated. + assert sim.loc['s1', 's2'] == pytest.approx(-1.0) + + +def test_similarity_matrix_jaccard_identical_presence_is_one(): + # s1/s2 detect exactly the same features (different abundances); + # s3 detects none of them. + x = pd.DataFrame( + [[5.0, 0.0, 2.0], [50.0, 0.0, 20.0], [0.0, 0.0, 0.0]], + index=['s1', 's2', 's3'], columns=['f1', 'f2', 'f3'], + ) + sim = similarity_matrix(x, 'Jaccard') + assert sim.loc['s1', 's2'] == pytest.approx(1.0) + assert np.allclose(np.diag(sim.values)[:2], 1.0) + + +def test_similarity_matrix_braycurtis_identical_profiles_is_one(): + x = pd.DataFrame( + [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0], [10.0, 0.0, 0.0]], + index=['s1', 's2', 's3'], columns=['f1', 'f2', 'f3'], + ) + sim = similarity_matrix(x, 'Bray-Curtis') + assert sim.loc['s1', 's2'] == pytest.approx(1.0) + assert sim.loc['s1', 's3'] < sim.loc['s1', 's2'] + + +def test_similarity_matrix_unknown_method_raises(): + x = pd.DataFrame([[1.0, 2.0]], index=['s1'], columns=['f1', 'f2']) + with pytest.raises(ValueError): + similarity_matrix(x, 'Pearson') + + # --------------------------------------------------------------------------- # # top_loadings # --------------------------------------------------------------------------- # diff --git a/code/ui_functions.py b/code/ui_functions.py index 0a49837..148629f 100644 --- a/code/ui_functions.py +++ b/code/ui_functions.py @@ -124,8 +124,8 @@ def uiDefinitions(self): self.ui.btn_cvplt.clicked.connect(lambda: self.ui.stackedWidget_review.setCurrentIndex(2)) self.ui.btn_datasummary.clicked.connect(lambda: self.ui.stackedWidget_review.setCurrentIndex(3)) - self.ui.btn_upsetplt.clicked.connect(lambda: self.ui.stackedWidget_grpanalysis.setCurrentIndex(0)) - self.ui.btn_samplecorr.clicked.connect(lambda: self.ui.stackedWidget_grpanalysis.setCurrentIndex(1)) + self.ui.btn_upsetplt.clicked.connect(lambda: UIFunctions.switch_grpanalysis_tab(self, 0)) + self.ui.btn_samplecorr.clicked.connect(lambda: UIFunctions.switch_grpanalysis_tab(self, 1)) #feature info bar functions self.ftrdialog.ui.btn_close.clicked.connect(lambda: self.ftrdialog.hide()) @@ -230,6 +230,15 @@ def goto_review(self): self.dialog.ui.checkBox_applyfilter.hide() + def switch_grpanalysis_tab(self, idx): + """Switch the Group Analysis sub-tab (UpSet Plot=0, Sample + Correlations=1) and grey out plot_samplecorr's Method/View/Use-Names + controls -- shared with btn_upsetplt/btn_samplecorr in frame_12 -- + whenever the UpSet Plot tab is active, since they don't apply there.""" + self.ui.stackedWidget_grpanalysis.setCurrentIndex(idx) + if getattr(self, 'samplecorr', None) is not None: + self.samplecorr.set_controls_enabled(idx == 1) + def goto_upset(self): self.ui.stackedWidget_infobar.setCurrentIndex(1) self.ui.stackedWidget_plot.setCurrentIndex(9) diff --git a/devnotes.md b/devnotes.md index ece3cf0..cb6689e 100644 --- a/devnotes.md +++ b/devnotes.md @@ -309,8 +309,8 @@ documented further down, formerly/newly local to this tab respectively): group, i.e. the groups are separable. - **Color** — how to render purity: - **Purity** (default): green wherever a branch's leaves are entirely one - group (correctly clustered), red wherever a branch mixes more than one - group (polyphyletic) — a QC judgment visible at a glance rather than + group (correctly clustered), magenta wherever a branch mixes more than + one group (polyphyletic) — a QC judgment visible at a glance rather than read off leaf labels one at a time. The plot title reports `n_pure/n_total` (e.g. "7/9 samples' replicates clustered together", "3/3 biological groups separable") via `clusterpurity.purity_summary()`. @@ -326,18 +326,19 @@ documented further down, formerly/newly local to this tab respectively): Both views' purity math is the same Qt-free linkage-traversal logic in `clusterpurity.py`, unit-tested in `tests/test_clusterpurity.py`. -- **Red marks proven non-monophyly (overlap), not "any impure merge"**: two - earlier attempts both got this wrong in opposite directions. First, every - impure merge was colored red, including every ancestor above a single - mixing event all the way to the root -- since almost any real dataset has - *some* mixing somewhere, this painted most of the tree's upper structure - red regardless of how localized the problem was. The second attempt - ("impure but at least one child was pure = bridge = red, both children - already impure = neutral") fixed the worst of the cascading but still - mis-colored real data: it could still mark a high-level merge red merely - because one side happened to be a single freshly-introduced pure clade, - *and* it could miss real tangles where two already-impure children share - a label without one side being trivially pure. +- **`false_color` marks proven non-monophyly (overlap), not "any impure + merge"**: two earlier attempts both got this wrong in opposite directions. + First, every impure merge was colored `false_color`, including every + ancestor above a single mixing event all the way to the root -- since + almost any real dataset has *some* mixing somewhere, this painted most of + the tree's upper structure regardless of how localized the problem was. + The second attempt ("impure but at least one child was pure = bridge = + `false_color`, both children already impure = neutral") fixed the worst + of the cascading but still mis-colored real data: it could still mark a + high-level merge `false_color` merely because one side happened to be a + single freshly-introduced pure clade, *and* it could miss real tangles + where two already-impure children share a label without one side being + trivially pure. `purity_link_color_func()` now compares the two children's label sets directly at each merge: @@ -350,18 +351,26 @@ Both views' purity math is the same Qt-free linkage-traversal logic in in, every merge above it only ever joins disjoint regions, so it goes back to black. - **overlap** (share >=1 label, without being identical-and-singleton) -> - polyphyletic (`false_color`/red) -- definitive proof that some label's - leaves are split across this exact merge (present on both sides), not - just "still mixed from an earlier merge". + polyphyletic (`false_color`/magenta) -- definitive proof that some + label's leaves are split across this exact merge (present on both + sides), not just "still mixed from an earlier merge". Verified against the real example dataset's bootstrap dendrogram (the case that exposed both earlier bugs): only the two merges that actually re-unite a scattered sample's replicates (e.g. one sample's reps split into two non-sister sub-clades that only meet again higher up) render - red; the higher-level merges joining that region with cleanly-resolved, - unrelated samples stay black, same as a hand-built synthetic linkage - (`tests/test_clusterpurity.py`'s `_scattered_pair_linkage`) reproducing - the same pattern deterministically. + `false_color`; the higher-level merges joining that region with + cleanly-resolved, unrelated samples stay black, same as a hand-built + synthetic linkage (`tests/test_clusterpurity.py`'s + `_scattered_pair_linkage`) reproducing the same pattern deterministically. + + `true_color`/`false_color` default to green/magenta, not the more + conventional green/red: red-green colorblindness (the most common form) + can't distinguish red from green, while magenta stays distinguishable + from green under all common forms of color vision deficiency. (Changed + from an original green/red default after user feedback; see + `clusterpurity.py`'s `purity_link_color_func()` default args and + `plotting.py`'s `plot_dendrogram.plot()` call site.) - **Bootstrap is now a per-tab checkbox, not a global one**: the plot-config dialog's "Bootstrap Analysis" checkbox (`checkBox_bootstrap`) only ever affected this one plot, so it moved into `plot_dendrogram`'s own @@ -498,6 +507,68 @@ other plot already worked, not a new inconsistency. old axes/figures were leaking), and (3) no PNG got written to disk by either plot anymore. +## Sample correlation matrix (`plotting.plot_samplecorr`, `ordination.similarity_matrix`) + +Used to be a hardcoded Spearman-only heatmap with technical replicates +always pre-averaged and no way to relabel the raw injection/sample names. +Now has a Method (Spearman/Jaccard/Bray-Curtis) switcher, a View +(Biological Replicates/Individual Injections/Biological Groups) switcher, +and a "Use Sample/Group Names" checkbox — same nomenclature and +`ordination.replicate_label_components()` reuse as `plot_dendrogram`'s. + +- **`ordination.similarity_matrix(x, method)`** is the new Qt-free backend + (covered by `test_ordination.py`): `x` is samples x features, same + convention as `run_pca`/`run_nmds`/`run_plsda`. Spearman is + `x.transpose().corr(method='spearman')`; Jaccard/Bray-Curtis go through + `sklearn.metrics.pairwise_distances` (`metric='jaccard'`/`'braycurtis'`) + and return `1 - distance`. Jaccard is computed on `x > 0` (presence/ + absence of detection, ignoring abundance) — deliberately *not* derived + from the groupset query-dict machinery the user floated as a possible + source, since that's per-feature-list bookkeeping for the UpSet/treemap + tabs, a different concept from per-sample/group detection. + Pearson/Kendall were considered and rejected: Pearson assumes + normally-distributed abundances (the wrong fit for heavy-tailed LC-MS + intensities, same reasoning that makes Spearman the established choice + here), Kendall is a slower, largely redundant rank-correlation + alternative to Spearman. +- **Controls live in the *shared* `frame_12`/`horizontalLayout_25` nav + bar** (the one holding the pre-existing `btn_upsetplt`/`btn_samplecorr` + buttons that switch `stackedWidget_grpanalysis`), not in this plot's own + canvas frame — unlike `plot_dendrogram`/`plot_ordination`'s per-canvas + switcher bars, these controls are specific to the Sample Correlations + page but the nav bar is shared with the unrelated UpSet Plot page. + `plot_samplecorr._build_grpanalysis_controls()` appends a stretch then + its own control widget onto the existing layout (no `ui_main.py` edit) + so the new controls sit to the right of the two buttons and the bar + stays a single row regardless of window width. +- **Greying out on the UpSet Plot tab**: `ui_functions.py`'s + `btn_upsetplt`/`btn_samplecorr` click handlers used to call + `stackedWidget_grpanalysis.setCurrentIndex()` directly; they now route + through `UIFunctions.switch_grpanalysis_tab(self, idx)`, which also calls + `self.samplecorr.set_controls_enabled(idx == 1)` (guarded by + `getattr(self, 'samplecorr', None) is not None`, since this can fire + before any analysis has run and created the plot object). The Designer + default for `stackedWidget_grpanalysis` is already index 1 + (Sample Correlations), so the controls start enabled, matching the + default active page. +- **View → row/column construction**: "Biological Replicates" and + "Individual Injections" reuse `ordination.load_ordination_matrix()` + exactly like `plot_dendrogram` does (`collapse_replicates=True`/`False`). + "Biological Groups" takes the collapsed (Biological Replicates) matrix + and does one more `x.groupby(biolgroup).mean()` to average across + biological replicates too — deliberately not a third mode inside + `load_ordination_matrix` itself, since it's a trivial one-line reduction + of an already-correct, already-tested intermediate result. +- **Heatmap `vmin` is `0` for all three methods**, including Spearman. + Spearman is mathematically capable of going negative, but real + sample-vs-sample correlations in this kind of data cluster tightly + positive (e.g. 0.7-1.0) — a `-1..1` scale (tried first) compressed all of + that meaningful variation into a sliver of the colour range, making the + heatmap look uniformly dark/uninformative. `0..1` keeps the full colour + range usable for the variation that actually occurs. +- Dropped the dead `iondict = cached_read_csv(...)` read that was never + actually used by the old `plot()` body. + ## Conventions - Don't edit the generated UI files (above). Put behaviour in `main.py` / diff --git a/docs/changelog.md b/docs/changelog.md index c9475ef..82b796d 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -22,8 +22,10 @@ Replicates), **Color** (Purity / None), **Bootstrap**, and **Use Sample/Group Names**. - Purity coloring: green branches are cleanly within-sample/group; - red branches mark the exact merge point where two groups' leaves - overlap (proven non-monophyly). A title reports how many + magenta branches mark the exact merge point where two groups' leaves + overlap (proven non-monophyly) — magenta rather than the more + conventional red, since red-green colorblindness (the most common + form) can't distinguish red from green. A title reports how many samples/groups are fully correctly clustered. - "Use Sample/Group Names" checkbox: replaces raw injection/file names with `_b<#>_s<#>` (Technical Replicates view) or @@ -32,6 +34,12 @@ - Fixed AU/BP annotation alignment: labels now stay a constant pixel gap apart regardless of leaf count, with leaf-count-scaled font size. - `fastcluster` optional acceleration for bootstrap linkage. +- Sample Correlation Matrix rework: a **Method** switcher (Spearman / + Jaccard / Bray-Curtis), a **View** switcher (Individual Injections / + Biological Replicates / Biological Groups), and a "Use Sample/Group + Names" checkbox, all in the nav bar shared with the UpSet Plot tab + (greyed out while that tab is active). The heatmap scale is fixed to + 0-1 for all three methods. - UpSet and treemap plots are now rendered directly on a Qt canvas (replacing a PNG round-trip). - Headless unit test suite (`code/tests/`). diff --git a/docs/development.md b/docs/development.md deleted file mode 100644 index 0879d35..0000000 --- a/docs/development.md +++ /dev/null @@ -1,122 +0,0 @@ -# Development - -This page is for people contributing to MPACT itself, not end users. For -the authoritative, most up-to-date version of these notes (kept alongside -the code), see [`devnotes.md`](https://github.com/BalunasLab/mpact/blob/main/devnotes.md) -in the repo root. - -## Architecture - -- **Generated — do not edit:** `ui_main.py`, `ui_main1.py`, - `ui_featureinfo.py`, `ui_plotparam.py`, `files.py`, `files_rc.py`. These - are Qt Designer output and get overwritten on regeneration. (Despite the - name, `ui_functions.py` is hand-written and fully editable — it's the - `UIFunctions` controller class.) -- **Hand-written app code:** `main.py` (`MainWindow`, run/save/load, - database search), `plotting.py` (plot classes), `filter.py`, `stats.py`, - `MSFaST.py` (analysis driver), `pvclust.py` (bootstrap dendrogram), - `ordination.py` (Qt-free PCA/NMDS/PLS-DA backend), - `clusterpurity.py` (dendrogram branch-purity logic), - `csvcache.py` (cached CSV reads for the ordination data path), - `translators.py` (import/export framework), `mzmineimport.py` (format - conversion), `getfragdb.py`, `mspwriter.py`. -- **Canonical peak table** format (what `MSFaST` consumes internally; - Progenesis is the native/baseline format): CSV with 3 header rows, row 2 - = `Compound,m/z,Retention time (min),`, col0 = `RT_mz` id, - col1 = m/z, col2 = RT. - -## Threading model - -`run_MSFaST` is Qt-free and runs on a `QThread` worker (`AnalysisWorker` in -`main.py`), so the GUI stays responsive during the heavy compute. -`MainWindow.run_analysis` reads widgets on the main thread, starts the -worker, and `_finish_analysis` does all matplotlib/Qt plotting back on the -**main thread** (matplotlib is not thread-safe). Never create Qt/matplotlib -objects on the worker thread. - -## Importer/translator framework (`translators.py`) - -Qt-free and unit-tested: `detect_peaktable_format`, `parse_msp`/ -`parse_mgf` (→ `FragmentEntry`), `reindex_fragments` (matches fragments to -peak-table rows by compound ID first, then m/z+RT — Progenesis MSP stores -neutral mass, not adduct m/z), `filter_source_peaktable` (row-subsets the -source peak table to surviving features). `mzmineimport.format_check` -delegates detection to this module. - -## Groupsets MVC (`groupsets.py`) - -`GroupSet` (data) + `GroupSetModel` (collection, bounds-safe selection, -CRUD) + `build_query_dict()` replace what used to be a bare list + -selected-index pair. `MainWindow.groupsetmodel` is the live state; -`ui_functions.py`'s `addgroup`/`removegroup`/`updatesets`/`updategroups`/ -`writegroups`/`colour_picker1` are thin view-sync controllers over it. -`main.py`'s `query` class still exists, but **only** as the unpickle target -for old `.mpct` files — `GroupSet.from_legacy`/`GroupSetModel.from_legacy_list` -convert on load. - -## Testing - -Headless unit tests live in `code/tests/` (pure-logic only — no Qt): - -``` -python -m pytest code/tests -q -``` - -Covers `filter`, `stats`, `importdependencies`, `translators`, -`groupsets`, and `ordination`. Add tests here for any new Qt-free logic. -GUI behaviour can't be tested headlessly — verify it by running the app. - -## Conventions - -- Never edit the generated UI files listed above. -- Plot generation goes through `MainWindow.safe_generate`, so one failing - plot doesn't abort the rest. -- `.mpct` saves are atomic (temp file + `os.replace`), with per-component - guards (`write_save`). -- `loadsession` restores each saved parameter independently — a bad/missing - field can't cascade and abort restoration of the rest. Add new analysis - parameters to **both** `enumerate_inputs` (save) and `loadsession` - (restore). -- Plot objects (`self.ftplt`, `self.kmd`, `self.spec`, ...) are created the - first time they're needed and `.reset()` afterward, via - `MainWindow._create_or_reset()` / `_generate_plots()` — never gate - create-vs-reset on a whole-session flag like `self.analysisrun`, since an - optional output can newly turn on mid-session for a dataset that didn't - have it before, and the object would never get created. -- Use `MainWindow._refresh_highlight()` (not `highlight_feature()`) to - redraw the current selection without changing it (e.g. on a tab switch). - `highlight_feature(newfeature)` is for real selection events and toggles - the highlight off if the same feature is clicked twice — calling it with - the already-selected feature re-triggers that toggle, which is a bug, not - a refresh. -- Every matplotlib `pick_event` handler must call - `plotting._is_duplicate_pick(parent, event)` first and bail if it - returns `True`. Matplotlib fires one `pick_event` per artist that - registers a hit, not one per click — a feature plotted in more than one - groupset/colour layer otherwise fires the handler twice per click. -- `importdependencies.checkdep()` should stay silent when nothing needs - installing — it runs on every launch, including every Spyder "Run File" - (which re-executes `main.py`'s top level). Only report actual - installs/failures. - -## Building the docs site locally - -``` -pip install mkdocs mkdocs-material -mkdocs serve -``` - -Then open `http://127.0.0.1:8000`. See [Hosting](#hosting-this-site) below -for deployment. - -## Hosting this site - -This site is plain static HTML generated by MkDocs — there's no backend, -database, or server-side logic, so it needs essentially no infrastructure. -**GitHub Pages is the right fit here**: it's free, MkDocs has built-in -support for deploying to it (`mkdocs gh-deploy`), and a low-traffic docs -site for a research tool doesn't need a dedicated host. A `gh-pages` -deploy workflow is included in this repo (`.github/workflows/docs.yml`) — -once GitHub Pages is enabled for this repo (Settings → Pages → Source: -`gh-pages` branch), the site builds and publishes automatically on every -push to `main` that touches `docs/` or `mkdocs.yml`. diff --git a/docs/index.md b/docs/index.md index f61cc62..da22288 100644 --- a/docs/index.md +++ b/docs/index.md @@ -11,7 +11,9 @@ plots, heatmaps, and per-feature spectral/database-match lookup. This site covers installing and running MPACT, the file formats it expects, the analysis and filtering options, and what each plot/tab -shows. If you're contributing to MPACT itself, see [Development](development.md). +shows. If you're contributing to MPACT itself, see +[`devnotes.md`](https://github.com/robertsamples/mpact/blob/main/devnotes.md) +in the repo root. ## Where to start @@ -30,5 +32,6 @@ shows. If you're contributing to MPACT itself, see [Development](development.md) rework (PCA/NMDS/PLS-DA with scores and loadings views), and the dendrogram rework (purity coloring, view/bootstrap/label switchers). Some screenshots referenced in the original guide have not been - re-captured yet — see [Development](development.md) if you'd like to - contribute updated images. + re-captured yet — see + [`devnotes.md`](https://github.com/robertsamples/mpact/blob/main/devnotes.md) + if you'd like to contribute updated images. diff --git a/docs/plots/group-analysis.md b/docs/plots/group-analysis.md index 732a98b..46eef57 100644 --- a/docs/plots/group-analysis.md +++ b/docs/plots/group-analysis.md @@ -12,14 +12,51 @@ combination of groups (top bar chart + dot matrix). ![UpSet plot](../images/upset-plot.png) *MPACT UpSet plot showing the distribution of features across sample sets.* -## Spearman Correlation Matrix - -Pairwise Spearman correlation between every group, useful for evaluating -overall metabolomic similarity at a glance. Colour scheme is configurable -in the plot options dialog. - -![Spearman correlation matrix](../images/spearman-matrix.png) -*MPACT Spearman correlation matrix.* +## Sample Correlation Matrix + +Pairwise similarity between samples/groups, useful for evaluating overall +metabolomic similarity at a glance. Colour scheme is configurable in the +plot options dialog. + +A settings bar shared with the UpSet Plot tab (the same bar holding the +"Sets"/"Sample Correlations" buttons) controls how it's drawn, and redraws +immediately on any change. These controls are greyed out while the UpSet +Plot tab is active, since they don't apply there. + +**Method** — which similarity measure to compute: + +- **Spearman** (default): rank correlation of abundance profiles, robust + to the non-normal, heavy-tailed abundance distributions typical of + LC-MS data. Mathematically ranges -1 to 1, but the heatmap scale is fixed + to 0-1 since real sample correlations cluster tightly positive in + practice — a -1-to-1 scale would compress that variation into an + unreadable sliver of the colour range. +- **Jaccard**: presence/absence similarity — based only on which features + are detected in each sample/group, ignoring how much. Useful when + detection (not relative abundance) is what you care about. Ranges 0 to 1. +- **Bray-Curtis**: abundance-weighted similarity, the standard measure in + ecology/metabolomics (same convention as the Multivariate Analysis tab's + NMDS). Ranges 0 to 1. + +**View** — which rows/columns to correlate: + +- **Biological Replicates** (default): technical replicates are averaged + together first (one row/column per sample), so the matrix reflects + biological/treatment-group similarity without technical noise. +- **Individual Injections**: no averaging — every injection is its own + row/column. +- **Biological Groups**: both technical and biological replicates are + averaged together — one row/column per treatment group, for "see only + biological groups" at a glance. + +**Use Sample/Group Names** — same nomenclature as the dendrogram's: when +checked, labels switch from the raw injection/file names to +`_b<#>_s<#>` (Individual Injections view), `_b<#>` +(Biological Replicates view), or the bare group name (Biological Groups +view, nothing left to shorten). + +![Sample correlation matrix](../images/spearman-matrix.png) +*MPACT sample correlation matrix.* ## Dendrogram @@ -49,16 +86,18 @@ immediately when changed: - **Purity** (default): a branch is colored **green** if every leaf beneath it belongs to the same sample (Technical Replicates view) or the same treatment group (Biological Replicates view) — i.e. it's correctly, - unambiguously clustered. A branch is colored **red** if it's the specific - point where two different samples/groups' leaves are proven to overlap - (some of that sample's/group's replicates are on each side of the + unambiguously clustered. A branch is colored **magenta** if it's the + specific point where two different samples/groups' leaves are proven to + overlap (some of that sample's/group's replicates are on each side of the split) — a real sign of poor clustering, not just "still mixed from - somewhere lower in the tree." Every other branch (a clean join of two - unrelated, already-resolved regions) stays black, even if it sits above a - red branch elsewhere in the tree — so a single tangled sample doesn't - paint the whole tree red. The plot title reports how many - samples/groups are *fully* correctly clustered (e.g. "7/9 samples' - replicates clustered together"). + somewhere lower in the tree." (Magenta rather than the more conventional + red, since red-green colorblindness — the most common form — can't tell + red and green apart; magenta stays distinguishable from green.) Every + other branch (a clean join of two unrelated, already-resolved regions) + stays black, even if it sits above a magenta branch elsewhere in the tree + — so a single tangled sample doesn't paint the whole tree magenta. The + plot title reports how many samples/groups are *fully* correctly + clustered (e.g. "7/9 samples' replicates clustered together"). - **None**: a plain, uncolored dendrogram with no title — useful if you just want the clustering shape without the QC overlay. diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 8482fd4..d2cdef7 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -90,4 +90,6 @@ python -m pytest code/tests -q ``` GUI behaviour itself can't be tested headlessly and needs to be checked by -running the app — see [Development](development.md). +running the app — see +[`devnotes.md`](https://github.com/robertsamples/mpact/blob/main/devnotes.md) +in the repo root. diff --git a/docs/user-guide/analysis-settings.md b/docs/user-guide/analysis-settings.md index 1500af0..f7ba592 100644 --- a/docs/user-guide/analysis-settings.md +++ b/docs/user-guide/analysis-settings.md @@ -59,6 +59,6 @@ presence/absence in these groups, as indicated by Venn diagrams.* Internally, each Plot Feature Set is a `GroupSet` object managed by a small model/collection class (`GroupSetModel`) rather than a bare list + selected-index pair. This is purely an implementation detail (see - [Development](../development.md)) — old `.mpct` save files still load - correctly, with their saved feature sets converted into the current - representation automatically. + [`devnotes.md`](https://github.com/robertsamples/mpact/blob/main/devnotes.md)) + — old `.mpct` save files still load correctly, with their saved feature + sets converted into the current representation automatically. diff --git a/mkdocs.yml b/mkdocs.yml index 3900d26..5b76a22 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -59,5 +59,4 @@ nav: - Heatmap: plots/heatmap.md - Feature Info: feature-info.md - Troubleshooting: troubleshooting.md - - Development: development.md - Changelog: changelog.md From 6a289020873ae25effcc7ac1d56d7457a2616b70 Mon Sep 17 00:00:00 2001 From: Robert Samples Date: Tue, 30 Jun 2026 01:37:03 -0400 Subject: [PATCH 16/16] Update tests.yml sklearn not added to tests, caused ci build test failure --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3990924..bbf1c86 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -25,7 +25,7 @@ jobs: # against a few system libraries the base image doesn't ship. - if: runner.os == 'Linux' run: sudo apt-get update && sudo apt-get install -y libgl1 libxkbcommon-x11-0 libxcb-cursor0 - - run: pip install "numpy<2" pandas scipy tqdm pytest PyQt5 + - run: pip install "numpy<2" pandas scipy scikit-learn tqdm pytest PyQt5 - run: python -m pytest code/tests -v lint: